Remove threading from fictionwise and nicebooks

This commit is contained in:
Sengian 2010-12-11 23:19:25 +01:00
parent b2004ad77b
commit 1d968f71b7
2 changed files with 41 additions and 72 deletions

View File

@ -337,6 +337,7 @@ def search(title=None, author=None, publisher=None, isbn=None,
min_viewability='none', verbose=False, max_results=5, min_viewability='none', verbose=False, max_results=5,
keywords=None): keywords=None):
br = browser() br = browser()
islink = False
entries, islink = Query(title=title, author=author, publisher=publisher, entries, islink = Query(title=title, author=author, publisher=publisher,
keywords=keywords, max_results=max_results)(br, verbose, timeout = 15.) keywords=keywords, max_results=max_results)(br, verbose, timeout = 15.)

View File

@ -80,46 +80,6 @@ class NiceBooksError(Exception):
class ISBNNotFound(NiceBooksError): class ISBNNotFound(NiceBooksError):
pass pass
class BrowserThread(Thread):
def __init__(self, url, verbose=False, timeout=10., ex=Exception, name='Meta'):
self.url = url
self.ex = ex
self.plugname = name
self.verbose = verbose
self.timeout = timeout
self.result = None
Thread.__init__(self)
def get_result(self):
return self.result
def run(self):
try:
raw = browser().open_novisit(self.url, timeout=self.timeout).read()
except Exception, e:
report(self.verbose)
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
self.result = None
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
raise self.ex(_('%s timed out. Try again later.') % self.plugname)
raise self.ex(_('%s encountered an error.') % self.plugname)
if '<title>404 - ' in raw:
report(self.verbose)
self.result = None
return None
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
self.result = soupparser.fromstring(raw)
except:
try:
#remove ASCII invalid chars
self.result = soupparser.fromstring(clean_ascii_chars(raw))
except:
self.result = None
def report(verbose): def report(verbose):
if verbose: if verbose:
traceback.print_exc() traceback.print_exc()
@ -156,7 +116,7 @@ class Query(object):
report(verbose) report(verbose)
if callable(getattr(e, 'getcode', None)) and \ if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404: e.getcode() == 404:
return return None
if isinstance(getattr(e, 'args', [None])[0], socket.timeout): if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
raise NiceBooksError(_('Nicebooks timed out. Try again later.')) raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
raise NiceBooksError(_('Nicebooks encountered an error.')) raise NiceBooksError(_('Nicebooks encountered an error.'))
@ -178,7 +138,7 @@ class Query(object):
nbresults = int(feed.xpath("//div[@id='topbar']/b")[0].text) nbresults = int(feed.xpath("//div[@id='topbar']/b")[0].text)
except: except:
#direct hit #direct hit
return [feed] return [feed], False
nbpagetoquery = int(ceil(float(min(nbresults, self.max_results))/10)) nbpagetoquery = int(ceil(float(min(nbresults, self.max_results))/10))
pages =[feed] pages =[feed]
@ -207,13 +167,14 @@ class Query(object):
for x in pages: for x in pages:
results.extend([i.find_class('title')[0].get('href') \ results.extend([i.find_class('title')[0].get('href') \
for i in x.xpath("//ul[@id='results']/li")]) for i in x.xpath("//ul[@id='results']/li")])
return results[:self.max_results] return results[:self.max_results], True
class ResultList(list): class ResultList(list):
BASE_URL = 'http://fr.nicebooks.com' BASE_URL = 'http://fr.nicebooks.com'
def __init__(self): def __init__(self, islink):
self.islink = islink
self.repub = re.compile(u'\s*.diteur\s*', re.I) self.repub = re.compile(u'\s*.diteur\s*', re.I)
self.reauteur = re.compile(u'\s*auteur.*', re.I) self.reauteur = re.compile(u'\s*auteur.*', re.I)
self.reautclean = re.compile(u'\s*\(.*\)\s*') self.reautclean = re.compile(u'\s*\(.*\)\s*')
@ -287,36 +248,42 @@ class ResultList(list):
pass pass
return mi return mi
def producer(self, q, data, verbose=False): def get_individual_metadata(self, url, br, verbose):
for x in data: try:
thread = BrowserThread(self.BASE_URL+x, verbose=verbose, ex=NiceBooksError, raw = br.open_novisit(url).read()
name='Nicebooks') except Exception, e:
thread.start() report(verbose)
q.put(thread, True) if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return None
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
raise NiceBooksError(_('NiceBooks timed out. Try again later.'))
raise NiceBooksError(_('NiceBooks encountered an error.'))
if '<title>404 - ' in raw:
report(verbose)
return None
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
return soupparser.fromstring(raw)
except:
try:
#remove ASCII invalid chars
return soupparser.fromstring(clean_ascii_chars(raw))
except:
report(verbose)
return None
def consumer(self, q, total_entries, verbose=False): def populate(self, entries, br, verbose=False):
while len(self) < total_entries: if not self.islink:
thread = q.get(True)
thread.join()
mi = thread.get_result()
if mi is None:
self.append(None)
else:
self.append(self.fill_MI(mi, verbose))
def populate(self, entries, verbose=False, brcall=3):
if len(entries) == 1 and not isinstance(entries[0], str):
#single entry #single entry
self.append(self.fill_MI(entries[0], verbose)) self.append(self.fill_MI(entries[0], verbose))
else: else:
#multiple entries #multiple entries
q = Queue(brcall) for x in entries:
prod_thread = Thread(target=self.producer, args=(q, entries, verbose)) entry = self.get_individual_metadata(self.BASE_URL+x, br, verbose)
cons_thread = Thread(target=self.consumer, args=(q, len(entries), verbose)) if entry is not None:
prod_thread.start() self.append(self.fill_MI(entry, verbose))
cons_thread.start()
prod_thread.join()
cons_thread.join()
class Covers(object): class Covers(object):
@ -358,15 +325,16 @@ class Covers(object):
def search(title=None, author=None, publisher=None, isbn=None, def search(title=None, author=None, publisher=None, isbn=None,
max_results=5, verbose=False, keywords=None): max_results=5, verbose=False, keywords=None):
br = browser() br = browser()
entries = Query(title=title, author=author, isbn=isbn, publisher=publisher, islink = False
entries, islink = Query(title=title, author=author, isbn=isbn, publisher=publisher,
keywords=keywords, max_results=max_results)(br, verbose, timeout = 10.) keywords=keywords, max_results=max_results)(br, verbose, timeout = 10.)
if entries is None or len(entries) == 0: if entries is None or len(entries) == 0:
return None return None
#List of entry #List of entry
ans = ResultList() ans = ResultList(islink)
ans.populate(entries, verbose) ans.populate(entries, br, verbose)
return [x for x in ans if x is not None] return [x for x in ans if x is not None]
def check_for_cover(isbn): def check_for_cover(isbn):