This commit is contained in:
Sengian 2010-12-13 23:24:12 +01:00
parent d374b36e97
commit 81af8382d6
3 changed files with 39 additions and 37 deletions

View File

@ -2,7 +2,7 @@ from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2010, sengian <sengian1@gmail.com>'
import sys, textwrap, re, traceback, socket
import sys, re
from threading import Thread
from Queue import Queue
from urllib import urlencode
@ -61,6 +61,7 @@ class Amazon(MetadataSource):
tempres.extend(tmpnoloc)
self.results = tempres
except Exception, e:
import traceback
self.exception = e
self.tb = traceback.format_exc()
@ -107,12 +108,14 @@ class AmazonSocial(MetadataSource):
tmploc.tags = tmpnoloc.tags
self.results = tmploc
except Exception, e:
import traceback
self.exception = e
self.tb = traceback.format_exc()
def report(verbose):
if verbose:
import traceback
traceback.print_exc()
class AmazonError(Exception):
@ -208,33 +211,40 @@ class Query(object):
q = q.encode('utf-8')
self.urldata += '/gp/search/ref=sr_adv_b/?' + urlencode(q)
def __call__(self, browser, verbose, timeout = 5.):
def brcall(self, browser, url, verbose, timeout):
if verbose:
print _('Query: %s') % self.urldata
print _('Query: %s') % url
try:
raw = browser.open_novisit(self.urldata, timeout=timeout).read()
raw = browser.open_novisit(url, timeout=timeout).read()
except Exception, e:
import socket
report(verbose)
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return None, self.urldata
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
raise AmazonError(_('Amazon timed out. Try again later.'))
raise AmazonError(_('Amazon encountered an error.'))
return None
attr = getattr(e, 'args', [None])
attr = attr if attr else [None]
if isinstance(attr[0], socket.timeout):
raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
raise NiceBooksError(_('Nicebooks encountered an error.'))
if '<title>404 - ' in raw:
return None, self.urldata
return
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
feed = soupparser.fromstring(raw)
return soupparser.fromstring(raw)
except:
try:
#remove ASCII invalid chars
return soupparser.fromstring(clean_ascii_chars(raw))
except:
return None, self.urldata
return None
def __call__(self, browser, verbose, timeout = 5.):
feed = self.brcall(browser, self.urldata, verbose, timeout)
if feed is None:
return None, self.urldata
#nb of page
try:
@ -247,23 +257,10 @@ class Query(object):
if len(nbresults) > 1:
nbpagetoquery = int(ceil(float(min(int(nbresults[2]), self.max_results))/ int(nbresults[1])))
for i in xrange(2, nbpagetoquery + 1):
try:
urldata = self.urldata + '&page=' + str(i)
raw = browser.open_novisit(urldata, timeout=timeout).read()
except Exception, e:
urldata = self.urldata + '&page=' + str(i)
feed = self.brcall(browser, urldata, verbose, timeout)
if feed is None:
continue
if '<title>404 - ' in raw:
continue
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
feed = soupparser.fromstring(raw)
except:
try:
#remove ASCII invalid chars
return soupparser.fromstring(clean_ascii_chars(raw))
except:
continue
pages.append(feed)
results = []
@ -453,11 +450,14 @@ class ResultList(object):
try:
raw = br.open_novisit(url).read()
except Exception, e:
import socket
report(verbose)
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return None
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
attr = getattr(e, 'args', [None])
attr = attr if attr else [None]
if isinstance(attr[0], socket.timeout):
raise AmazonError(_('Amazon timed out. Try again later.'))
raise AmazonError(_('Amazon encountered an error.'))
if '<title>404 - ' in raw:
@ -584,6 +584,7 @@ def get_social_metadata(title, authors, publisher, isbn, verbose=False,
return [mi]
def option_parser():
import textwrap
parser = OptionParser(textwrap.dedent(\
_('''\
%prog [options]
@ -648,6 +649,6 @@ if __name__ == '__main__':
sys.exit(main())
# import cProfile
# sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()"))
# sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()", "profile_tmp_2"))
# sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()", "profile"))
# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazonbis.py" -m 5 -a gore -v>data.html
# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazon.py" -m 5 -a gore -v>data.html

View File

@ -14,11 +14,12 @@ from calibre import browser, preferred_encoding
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
authors_to_sort_string
from calibre.library.comments import sanitize_comments_html
from calibre.ebooks.metadata.fetch import MetadataSource
from calibre.library.comments import sanitize_comments_html
from calibre.utils.config import OptionParser
from calibre.utils.date import parse_date, utcnow
from calibre.utils.cleantext import clean_ascii_chars, unescape
from calibre.utils.date import parse_date, utcnow
class Fictionwise(MetadataSource):

View File

@ -1,6 +1,6 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>, 2010, sengian <sengian1@gmail.com>'
__docformat__ = 'restructuredtext en'
import sys
@ -12,13 +12,13 @@ from functools import partial
from lxml import etree
from calibre import browser, preferred_encoding
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
authors_to_sort_string
from calibre.ebooks.metadata.fetch import MetadataSource
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.config import OptionParser
from calibre.utils.date import parse_date, utcnow
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.date import parse_date, utcnow
NAMESPACES = {
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',