Remove threading from fictionwise

This commit is contained in:
Sengian 2010-12-11 22:41:37 +01:00
parent d5bc18b5c2
commit b2004ad77b

View File

@ -4,8 +4,6 @@ __copyright__ = '2010, sengian <sengian1@gmail.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import sys, textwrap, re, traceback, socket import sys, textwrap, re, traceback, socket
from threading import Thread
from Queue import Queue
from urllib import urlencode from urllib import urlencode
from lxml.html import soupparser, tostring from lxml.html import soupparser, tostring
@ -20,7 +18,7 @@ from calibre.utils.config import OptionParser
from calibre.utils.date import parse_date, utcnow from calibre.utils.date import parse_date, utcnow
from calibre.utils.cleantext import clean_ascii_chars, unescape from calibre.utils.cleantext import clean_ascii_chars, unescape
class Fictionwise(MetadataSource): # {{{ class Fictionwise(MetadataSource):
author = 'Sengian' author = 'Sengian'
name = 'Fictionwise' name = 'Fictionwise'
@ -36,51 +34,10 @@ class Fictionwise(MetadataSource): # {{{
self.exception = e self.exception = e
self.tb = traceback.format_exc() self.tb = traceback.format_exc()
# }}}
class FictionwiseError(Exception): class FictionwiseError(Exception):
pass pass
class BrowserThread(Thread):
def __init__(self, url, verbose=False, timeout=10., ex=Exception, name='Meta'):
self.url = url
self.ex = ex
self.plugname = name
self.verbose = verbose
self.timeout = timeout
self.result = None
Thread.__init__(self)
def get_result(self):
return self.result
def run(self):
try:
raw = browser().open_novisit(self.url, timeout=self.timeout).read()
except Exception, e:
report(self.verbose)
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
self.result = None
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
raise self.ex(_('%s timed out. Try again later.') % self.plugname)
raise self.ex(_('%s encountered an error.') % self.plugname)
if '<title>404 - ' in raw:
report(self.verbose)
self.result = None
return None
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
self.result = soupparser.fromstring(raw)
except:
try:
#remove ASCII invalid chars
self.result = soupparser.fromstring(clean_ascii_chars(raw))
except:
self.result = None
def report(verbose): def report(verbose):
if verbose: if verbose:
traceback.print_exc() traceback.print_exc()
@ -161,15 +118,16 @@ class Query(object):
results = [i.xpath('descendant-or-self::a')[0].get('href') for i in results] results = [i.xpath('descendant-or-self::a')[0].get('href') for i in results]
#return feed if no links ie normally a single book or nothing #return feed if no links ie normally a single book or nothing
if not results: if not results:
results = [feed] return [feed], False
return results return results, True
class ResultList(list): class ResultList(list):
BASE_URL = 'http://www.fictionwise.com' BASE_URL = 'http://www.fictionwise.com'
COLOR_VALUES = {'BLUE': 4, 'GREEN': 3, 'YELLOW': 2, 'RED': 1, 'NA': 0} COLOR_VALUES = {'BLUE': 4, 'GREEN': 3, 'YELLOW': 2, 'RED': 1, 'NA': 0}
def __init__(self): def __init__(self, islink):
self.islink = islink
self.retitle = re.compile(r'\[[^\[\]]+\]') self.retitle = re.compile(r'\[[^\[\]]+\]')
self.rechkauth = re.compile(r'.*book\s*by', re.I) self.rechkauth = re.compile(r'.*book\s*by', re.I)
self.redesc = re.compile(r'book\s*description\s*:\s*(<br[^>]+>)*(?P<desc>.*)<br[^>]*>.{,15}publisher\s*:', re.I) self.redesc = re.compile(r'book\s*description\s*:\s*(<br[^>]+>)*(?P<desc>.*)<br[^>]*>.{,15}publisher\s*:', re.I)
@ -337,47 +295,53 @@ class ResultList(list):
pass pass
return mi return mi
def producer(self, q, data, verbose=False): def get_individual_metadata(self, url, br, verbose):
for x in data: try:
thread = BrowserThread(self.BASE_URL+x, verbose=verbose, ex=FictionwiseError, raw = br.open_novisit(url).read()
name='Fictionwise') except Exception, e:
thread.start() report(verbose)
q.put(thread, True) if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return None
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
raise FictionwiseError(_('Fictionwise encountered an error.'))
if '<title>404 - ' in raw:
report(verbose)
return None
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
return soupparser.fromstring(raw)
except:
try:
#remove ASCII invalid chars
return soupparser.fromstring(clean_ascii_chars(raw))
except:
report(verbose)
return None
def consumer(self, q, total_entries, verbose=False): def populate(self, entries, br, verbose=False):
while len(self) < total_entries: if not self.islink:
thread = q.get(True)
thread.join()
mi = thread.get_result()
if mi is None:
self.append(None)
else:
self.append(self.fill_MI(mi, verbose))
def populate(self, entries, verbose=False, brcall=3):
if len(entries) == 1 and not isinstance(entries[0], str):
#single entry #single entry
self.append(self.fill_MI(entries[0], verbose)) self.append(self.fill_MI(entries[0], verbose))
else: else:
#multiple entries #multiple entries
q = Queue(brcall) for x in entries:
prod_thread = Thread(target=self.producer, args=(q, entries, verbose)) entry = self.get_individual_metadata(self.BASE_URL+x, br, verbose)
cons_thread = Thread(target=self.consumer, args=(q, len(entries), verbose)) if entry is not None:
prod_thread.start() self.append(self.fill_MI(entry, verbose))
cons_thread.start()
prod_thread.join()
cons_thread.join()
def search(title=None, author=None, publisher=None, isbn=None, def search(title=None, author=None, publisher=None, isbn=None,
min_viewability='none', verbose=False, max_results=5, min_viewability='none', verbose=False, max_results=5,
keywords=None): keywords=None):
br = browser() br = browser()
entries = Query(title=title, author=author, publisher=publisher, entries, islink = Query(title=title, author=author, publisher=publisher,
keywords=keywords, max_results=max_results)(br, verbose, timeout = 15.) keywords=keywords, max_results=max_results)(br, verbose, timeout = 15.)
#List of entry #List of entry
ans = ResultList() ans = ResultList(islink)
ans.populate(entries, br, verbose) ans.populate(entries, br, verbose)
return [x for x in ans if x is not None] return [x for x in ans if x is not None]