mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Remove threading from fictionwise
This commit is contained in:
parent
d5bc18b5c2
commit
b2004ad77b
@ -4,8 +4,6 @@ __copyright__ = '2010, sengian <sengian1@gmail.com>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import sys, textwrap, re, traceback, socket
|
import sys, textwrap, re, traceback, socket
|
||||||
from threading import Thread
|
|
||||||
from Queue import Queue
|
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
|
|
||||||
from lxml.html import soupparser, tostring
|
from lxml.html import soupparser, tostring
|
||||||
@ -20,7 +18,7 @@ from calibre.utils.config import OptionParser
|
|||||||
from calibre.utils.date import parse_date, utcnow
|
from calibre.utils.date import parse_date, utcnow
|
||||||
from calibre.utils.cleantext import clean_ascii_chars, unescape
|
from calibre.utils.cleantext import clean_ascii_chars, unescape
|
||||||
|
|
||||||
class Fictionwise(MetadataSource): # {{{
|
class Fictionwise(MetadataSource):
|
||||||
|
|
||||||
author = 'Sengian'
|
author = 'Sengian'
|
||||||
name = 'Fictionwise'
|
name = 'Fictionwise'
|
||||||
@ -36,51 +34,10 @@ class Fictionwise(MetadataSource): # {{{
|
|||||||
self.exception = e
|
self.exception = e
|
||||||
self.tb = traceback.format_exc()
|
self.tb = traceback.format_exc()
|
||||||
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
class FictionwiseError(Exception):
|
class FictionwiseError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class BrowserThread(Thread):
|
|
||||||
|
|
||||||
def __init__(self, url, verbose=False, timeout=10., ex=Exception, name='Meta'):
|
|
||||||
self.url = url
|
|
||||||
self.ex = ex
|
|
||||||
self.plugname = name
|
|
||||||
self.verbose = verbose
|
|
||||||
self.timeout = timeout
|
|
||||||
self.result = None
|
|
||||||
Thread.__init__(self)
|
|
||||||
|
|
||||||
def get_result(self):
|
|
||||||
return self.result
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
try:
|
|
||||||
raw = browser().open_novisit(self.url, timeout=self.timeout).read()
|
|
||||||
except Exception, e:
|
|
||||||
report(self.verbose)
|
|
||||||
if callable(getattr(e, 'getcode', None)) and \
|
|
||||||
e.getcode() == 404:
|
|
||||||
self.result = None
|
|
||||||
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
|
|
||||||
raise self.ex(_('%s timed out. Try again later.') % self.plugname)
|
|
||||||
raise self.ex(_('%s encountered an error.') % self.plugname)
|
|
||||||
if '<title>404 - ' in raw:
|
|
||||||
report(self.verbose)
|
|
||||||
self.result = None
|
|
||||||
return None
|
|
||||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
|
||||||
resolve_entities=True)[0]
|
|
||||||
try:
|
|
||||||
self.result = soupparser.fromstring(raw)
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
#remove ASCII invalid chars
|
|
||||||
self.result = soupparser.fromstring(clean_ascii_chars(raw))
|
|
||||||
except:
|
|
||||||
self.result = None
|
|
||||||
|
|
||||||
def report(verbose):
|
def report(verbose):
|
||||||
if verbose:
|
if verbose:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
@ -161,15 +118,16 @@ class Query(object):
|
|||||||
results = [i.xpath('descendant-or-self::a')[0].get('href') for i in results]
|
results = [i.xpath('descendant-or-self::a')[0].get('href') for i in results]
|
||||||
#return feed if no links ie normally a single book or nothing
|
#return feed if no links ie normally a single book or nothing
|
||||||
if not results:
|
if not results:
|
||||||
results = [feed]
|
return [feed], False
|
||||||
return results
|
return results, True
|
||||||
|
|
||||||
class ResultList(list):
|
class ResultList(list):
|
||||||
|
|
||||||
BASE_URL = 'http://www.fictionwise.com'
|
BASE_URL = 'http://www.fictionwise.com'
|
||||||
COLOR_VALUES = {'BLUE': 4, 'GREEN': 3, 'YELLOW': 2, 'RED': 1, 'NA': 0}
|
COLOR_VALUES = {'BLUE': 4, 'GREEN': 3, 'YELLOW': 2, 'RED': 1, 'NA': 0}
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, islink):
|
||||||
|
self.islink = islink
|
||||||
self.retitle = re.compile(r'\[[^\[\]]+\]')
|
self.retitle = re.compile(r'\[[^\[\]]+\]')
|
||||||
self.rechkauth = re.compile(r'.*book\s*by', re.I)
|
self.rechkauth = re.compile(r'.*book\s*by', re.I)
|
||||||
self.redesc = re.compile(r'book\s*description\s*:\s*(<br[^>]+>)*(?P<desc>.*)<br[^>]*>.{,15}publisher\s*:', re.I)
|
self.redesc = re.compile(r'book\s*description\s*:\s*(<br[^>]+>)*(?P<desc>.*)<br[^>]*>.{,15}publisher\s*:', re.I)
|
||||||
@ -337,47 +295,53 @@ class ResultList(list):
|
|||||||
pass
|
pass
|
||||||
return mi
|
return mi
|
||||||
|
|
||||||
def producer(self, q, data, verbose=False):
|
def get_individual_metadata(self, url, br, verbose):
|
||||||
for x in data:
|
try:
|
||||||
thread = BrowserThread(self.BASE_URL+x, verbose=verbose, ex=FictionwiseError,
|
raw = br.open_novisit(url).read()
|
||||||
name='Fictionwise')
|
except Exception, e:
|
||||||
thread.start()
|
report(verbose)
|
||||||
q.put(thread, True)
|
if callable(getattr(e, 'getcode', None)) and \
|
||||||
|
e.getcode() == 404:
|
||||||
|
return None
|
||||||
|
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
|
||||||
|
raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
|
||||||
|
raise FictionwiseError(_('Fictionwise encountered an error.'))
|
||||||
|
if '<title>404 - ' in raw:
|
||||||
|
report(verbose)
|
||||||
|
return None
|
||||||
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
|
resolve_entities=True)[0]
|
||||||
|
try:
|
||||||
|
return soupparser.fromstring(raw)
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
#remove ASCII invalid chars
|
||||||
|
return soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
return None
|
||||||
|
|
||||||
def consumer(self, q, total_entries, verbose=False):
|
def populate(self, entries, br, verbose=False):
|
||||||
while len(self) < total_entries:
|
if not self.islink:
|
||||||
thread = q.get(True)
|
|
||||||
thread.join()
|
|
||||||
mi = thread.get_result()
|
|
||||||
if mi is None:
|
|
||||||
self.append(None)
|
|
||||||
else:
|
|
||||||
self.append(self.fill_MI(mi, verbose))
|
|
||||||
|
|
||||||
def populate(self, entries, verbose=False, brcall=3):
|
|
||||||
if len(entries) == 1 and not isinstance(entries[0], str):
|
|
||||||
#single entry
|
#single entry
|
||||||
self.append(self.fill_MI(entries[0], verbose))
|
self.append(self.fill_MI(entries[0], verbose))
|
||||||
else:
|
else:
|
||||||
#multiple entries
|
#multiple entries
|
||||||
q = Queue(brcall)
|
for x in entries:
|
||||||
prod_thread = Thread(target=self.producer, args=(q, entries, verbose))
|
entry = self.get_individual_metadata(self.BASE_URL+x, br, verbose)
|
||||||
cons_thread = Thread(target=self.consumer, args=(q, len(entries), verbose))
|
if entry is not None:
|
||||||
prod_thread.start()
|
self.append(self.fill_MI(entry, verbose))
|
||||||
cons_thread.start()
|
|
||||||
prod_thread.join()
|
|
||||||
cons_thread.join()
|
|
||||||
|
|
||||||
|
|
||||||
def search(title=None, author=None, publisher=None, isbn=None,
|
def search(title=None, author=None, publisher=None, isbn=None,
|
||||||
min_viewability='none', verbose=False, max_results=5,
|
min_viewability='none', verbose=False, max_results=5,
|
||||||
keywords=None):
|
keywords=None):
|
||||||
br = browser()
|
br = browser()
|
||||||
entries = Query(title=title, author=author, publisher=publisher,
|
entries, islink = Query(title=title, author=author, publisher=publisher,
|
||||||
keywords=keywords, max_results=max_results)(br, verbose, timeout = 15.)
|
keywords=keywords, max_results=max_results)(br, verbose, timeout = 15.)
|
||||||
|
|
||||||
#List of entry
|
#List of entry
|
||||||
ans = ResultList()
|
ans = ResultList(islink)
|
||||||
ans.populate(entries, br, verbose)
|
ans.populate(entries, br, verbose)
|
||||||
return [x for x in ans if x is not None]
|
return [x for x in ans if x is not None]
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user