Remove threading from fictionwise

2025-07-09 03:04:10 -04:00 · 2010-12-11 22:41:37 +01:00 · 2010-12-11 22:41:37 +01:00 · b2004ad77b
commit b2004ad77b
parent d5bc18b5c2
1 changed files with 38 additions and 74 deletions
--- a/src/calibre/ebooks/metadata/fictionwise.py
+++ b/src/calibre/ebooks/metadata/fictionwise.py
@ -4,8 +4,6 @@ __copyright__ = '2010, sengian <sengian1@gmail.com>'
 __docformat__ = 'restructuredtext en'
 import sys, textwrap, re, traceback, socket
 from threading import Thread
 from Queue import Queue
 from urllib import urlencode
 from lxml.html import soupparser, tostring
@ -20,7 +18,7 @@ from calibre.utils.config import OptionParser
 from calibre.utils.date import parse_date, utcnow
 from calibre.utils.cleantext import clean_ascii_chars, unescape
-class Fictionwise(MetadataSource): # {{{
+class Fictionwise(MetadataSource):
    author = 'Sengian'
    name = 'Fictionwise'
@ -36,51 +34,10 @@ class Fictionwise(MetadataSource): # {{{
            self.exception = e
            self.tb = traceback.format_exc()
    # }}}
 class FictionwiseError(Exception):
    pass
 class BrowserThread(Thread):
    def __init__(self, url, verbose=False, timeout=10., ex=Exception, name='Meta'):
        self.url = url
        self.ex = ex
        self.plugname = name
        self.verbose = verbose
        self.timeout = timeout
        self.result = None
        Thread.__init__(self)
    def get_result(self):
        return self.result
    def run(self):
        try:
            raw = browser().open_novisit(self.url, timeout=self.timeout).read()
        except Exception, e:
            report(self.verbose)
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                self.result = None
            if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
                raise self.ex(_('%s timed out. Try again later.') % self.plugname)
            raise self.ex(_('%s encountered an error.') % self.plugname)
        if '<title>404 - ' in raw:
            report(self.verbose)
            self.result = None
            return None
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
                resolve_entities=True)[0]
        try:
            self.result = soupparser.fromstring(raw)
        except:
            try:
                #remove ASCII invalid chars
                self.result = soupparser.fromstring(clean_ascii_chars(raw))
            except:
                self.result = None
 def report(verbose):
    if verbose:
        traceback.print_exc()
@ -161,15 +118,16 @@ class Query(object):
        results = [i.xpath('descendant-or-self::a')[0].get('href') for i in results]
        #return feed if no links ie normally a single book or nothing
        if not results:
-            results = [feed]
+            return [feed], False
-        return results
+        return results, True
 class ResultList(list):
    BASE_URL = 'http://www.fictionwise.com'
    COLOR_VALUES = {'BLUE': 4, 'GREEN': 3, 'YELLOW': 2, 'RED': 1, 'NA': 0}
-    def __init__(self):
+    def __init__(self, islink):
        self.islink = islink
        self.retitle = re.compile(r'\[[^\[\]]+\]')
        self.rechkauth = re.compile(r'.*book\s*by', re.I)
        self.redesc = re.compile(r'book\s*description\s*:\s*(<br[^>]+>)*(?P<desc>.*)<br[^>]*>.{,15}publisher\s*:', re.I)
@ -337,47 +295,53 @@ class ResultList(list):
            pass
        return mi
-    def producer(self, q, data, verbose=False):
+    def get_individual_metadata(self, url, br, verbose):
-        for x in data:
+        try:
-            thread = BrowserThread(self.BASE_URL+x, verbose=verbose, ex=FictionwiseError,
+            raw = br.open_novisit(url).read()
-                name='Fictionwise')
+        except Exception, e:
-            thread.start()
+            report(verbose)
-            q.put(thread, True)
+            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                return None
            if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
                raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
            raise FictionwiseError(_('Fictionwise encountered an error.'))
        if '<title>404 - ' in raw:
            report(verbose)
            return None
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
                resolve_entities=True)[0]
        try:
            return soupparser.fromstring(raw)
        except:
            try:
                #remove ASCII invalid chars
                return soupparser.fromstring(clean_ascii_chars(raw))
            except:
                report(verbose)
                return None
-    def consumer(self, q, total_entries, verbose=False):
+    def populate(self, entries, br, verbose=False):
-        while len(self) < total_entries:
+        if not self.islink:
            thread = q.get(True)
            thread.join()
            mi = thread.get_result()
            if mi is None:
                self.append(None)
            else:
                self.append(self.fill_MI(mi, verbose))
    def populate(self, entries, verbose=False, brcall=3):
        if len(entries) == 1 and not isinstance(entries[0], str):
            #single entry
            self.append(self.fill_MI(entries[0], verbose))
        else:
            #multiple entries
-            q = Queue(brcall)
+            for x in entries:
-            prod_thread = Thread(target=self.producer, args=(q, entries, verbose))
+                entry = self.get_individual_metadata(self.BASE_URL+x, br, verbose)
-            cons_thread = Thread(target=self.consumer, args=(q, len(entries), verbose))
+                if entry is not None:
-            prod_thread.start()
+                    self.append(self.fill_MI(entry, verbose))
            cons_thread.start()
            prod_thread.join()
            cons_thread.join()
 def search(title=None, author=None, publisher=None, isbn=None,
           min_viewability='none', verbose=False, max_results=5,
            keywords=None):
    br = browser()
-    entries = Query(title=title, author=author, publisher=publisher,
+    entries, islink = Query(title=title, author=author, publisher=publisher,
        keywords=keywords, max_results=max_results)(br, verbose, timeout = 15.)
    #List of entry
-    ans = ResultList()
+    ans = ResultList(islink)
    ans.populate(entries, br, verbose)
    return [x for x in ans if x is not None]