From b2004ad77bb3e1d7f6630f417740cc3cbd089cb1 Mon Sep 17 00:00:00 2001 From: Sengian Date: Sat, 11 Dec 2010 22:41:37 +0100 Subject: [PATCH] Remove threading from fictionwise --- src/calibre/ebooks/metadata/fictionwise.py | 112 +++++++-------------- 1 file changed, 38 insertions(+), 74 deletions(-) diff --git a/src/calibre/ebooks/metadata/fictionwise.py b/src/calibre/ebooks/metadata/fictionwise.py index efb19ca249..418a8ca771 100644 --- a/src/calibre/ebooks/metadata/fictionwise.py +++ b/src/calibre/ebooks/metadata/fictionwise.py @@ -4,8 +4,6 @@ __copyright__ = '2010, sengian ' __docformat__ = 'restructuredtext en' import sys, textwrap, re, traceback, socket -from threading import Thread -from Queue import Queue from urllib import urlencode from lxml.html import soupparser, tostring @@ -20,7 +18,7 @@ from calibre.utils.config import OptionParser from calibre.utils.date import parse_date, utcnow from calibre.utils.cleantext import clean_ascii_chars, unescape -class Fictionwise(MetadataSource): # {{{ +class Fictionwise(MetadataSource): author = 'Sengian' name = 'Fictionwise' @@ -36,51 +34,10 @@ class Fictionwise(MetadataSource): # {{{ self.exception = e self.tb = traceback.format_exc() - # }}} class FictionwiseError(Exception): pass -class BrowserThread(Thread): - - def __init__(self, url, verbose=False, timeout=10., ex=Exception, name='Meta'): - self.url = url - self.ex = ex - self.plugname = name - self.verbose = verbose - self.timeout = timeout - self.result = None - Thread.__init__(self) - - def get_result(self): - return self.result - - def run(self): - try: - raw = browser().open_novisit(self.url, timeout=self.timeout).read() - except Exception, e: - report(self.verbose) - if callable(getattr(e, 'getcode', None)) and \ - e.getcode() == 404: - self.result = None - if isinstance(getattr(e, 'args', [None])[0], socket.timeout): - raise self.ex(_('%s timed out. Try again later.') % self.plugname) - raise self.ex(_('%s encountered an error.') % self.plugname) - if '404 - ' in raw: - report(self.verbose) - self.result = None - return None - raw = xml_to_unicode(raw, strip_encoding_pats=True, - resolve_entities=True)[0] - try: - self.result = soupparser.fromstring(raw) - except: - try: - #remove ASCII invalid chars - self.result = soupparser.fromstring(clean_ascii_chars(raw)) - except: - self.result = None - def report(verbose): if verbose: traceback.print_exc() @@ -161,15 +118,16 @@ class Query(object): results = [i.xpath('descendant-or-self::a')[0].get('href') for i in results] #return feed if no links ie normally a single book or nothing if not results: - results = [feed] - return results + return [feed], False + return results, True class ResultList(list): BASE_URL = 'http://www.fictionwise.com' COLOR_VALUES = {'BLUE': 4, 'GREEN': 3, 'YELLOW': 2, 'RED': 1, 'NA': 0} - def __init__(self): + def __init__(self, islink): + self.islink = islink self.retitle = re.compile(r'\[[^\[\]]+\]') self.rechkauth = re.compile(r'.*book\s*by', re.I) self.redesc = re.compile(r'book\s*description\s*:\s*(<br[^>]+>)*(?P<desc>.*)<br[^>]*>.{,15}publisher\s*:', re.I) @@ -337,47 +295,53 @@ class ResultList(list): pass return mi - def producer(self, q, data, verbose=False): - for x in data: - thread = BrowserThread(self.BASE_URL+x, verbose=verbose, ex=FictionwiseError, - name='Fictionwise') - thread.start() - q.put(thread, True) + def get_individual_metadata(self, url, br, verbose): + try: + raw = br.open_novisit(url).read() + except Exception, e: + report(verbose) + if callable(getattr(e, 'getcode', None)) and \ + e.getcode() == 404: + return None + if isinstance(getattr(e, 'args', [None])[0], socket.timeout): + raise FictionwiseError(_('Fictionwise timed out. Try again later.')) + raise FictionwiseError(_('Fictionwise encountered an error.')) + if '<title>404 - ' in raw: + report(verbose) + return None + raw = xml_to_unicode(raw, strip_encoding_pats=True, + resolve_entities=True)[0] + try: + return soupparser.fromstring(raw) + except: + try: + #remove ASCII invalid chars + return soupparser.fromstring(clean_ascii_chars(raw)) + except: + report(verbose) + return None - def consumer(self, q, total_entries, verbose=False): - while len(self) < total_entries: - thread = q.get(True) - thread.join() - mi = thread.get_result() - if mi is None: - self.append(None) - else: - self.append(self.fill_MI(mi, verbose)) - - def populate(self, entries, verbose=False, brcall=3): - if len(entries) == 1 and not isinstance(entries[0], str): + def populate(self, entries, br, verbose=False): + if not self.islink: #single entry self.append(self.fill_MI(entries[0], verbose)) else: #multiple entries - q = Queue(brcall) - prod_thread = Thread(target=self.producer, args=(q, entries, verbose)) - cons_thread = Thread(target=self.consumer, args=(q, len(entries), verbose)) - prod_thread.start() - cons_thread.start() - prod_thread.join() - cons_thread.join() + for x in entries: + entry = self.get_individual_metadata(self.BASE_URL+x, br, verbose) + if entry is not None: + self.append(self.fill_MI(entry, verbose)) def search(title=None, author=None, publisher=None, isbn=None, min_viewability='none', verbose=False, max_results=5, keywords=None): br = browser() - entries = Query(title=title, author=author, publisher=publisher, + entries, islink = Query(title=title, author=author, publisher=publisher, keywords=keywords, max_results=max_results)(br, verbose, timeout = 15.) #List of entry - ans = ResultList() + ans = ResultList(islink) ans.populate(entries, br, verbose) return [x for x in ans if x is not None]