From 81af8382d630175c34157effb2fd104577dba2e0 Mon Sep 17 00:00:00 2001 From: Sengian Date: Mon, 13 Dec 2010 23:24:12 +0100 Subject: [PATCH] cleaning --- src/calibre/ebooks/metadata/amazon.py | 65 +++++++++++---------- src/calibre/ebooks/metadata/fictionwise.py | 5 +- src/calibre/ebooks/metadata/google_books.py | 6 +- 3 files changed, 39 insertions(+), 37 deletions(-) diff --git a/src/calibre/ebooks/metadata/amazon.py b/src/calibre/ebooks/metadata/amazon.py index 1362349685..aec4fb313a 100644 --- a/src/calibre/ebooks/metadata/amazon.py +++ b/src/calibre/ebooks/metadata/amazon.py @@ -2,7 +2,7 @@ from __future__ import with_statement __license__ = 'GPL 3' __copyright__ = '2010, sengian ' -import sys, textwrap, re, traceback, socket +import sys, re from threading import Thread from Queue import Queue from urllib import urlencode @@ -61,6 +61,7 @@ class Amazon(MetadataSource): tempres.extend(tmpnoloc) self.results = tempres except Exception, e: + import traceback self.exception = e self.tb = traceback.format_exc() @@ -107,12 +108,14 @@ class AmazonSocial(MetadataSource): tmploc.tags = tmpnoloc.tags self.results = tmploc except Exception, e: + import traceback self.exception = e self.tb = traceback.format_exc() def report(verbose): if verbose: + import traceback traceback.print_exc() class AmazonError(Exception): @@ -208,33 +211,40 @@ class Query(object): q = q.encode('utf-8') self.urldata += '/gp/search/ref=sr_adv_b/?' + urlencode(q) - def __call__(self, browser, verbose, timeout = 5.): + def brcall(self, browser, url, verbose, timeout): if verbose: - print _('Query: %s') % self.urldata - + print _('Query: %s') % url + try: - raw = browser.open_novisit(self.urldata, timeout=timeout).read() + raw = browser.open_novisit(url, timeout=timeout).read() except Exception, e: + import socket report(verbose) if callable(getattr(e, 'getcode', None)) and \ e.getcode() == 404: - return None, self.urldata - if isinstance(getattr(e, 'args', [None])[0], socket.timeout): - raise AmazonError(_('Amazon timed out. Try again later.')) - raise AmazonError(_('Amazon encountered an error.')) + return None + attr = getattr(e, 'args', [None]) + attr = attr if attr else [None] + if isinstance(attr[0], socket.timeout): + raise NiceBooksError(_('Nicebooks timed out. Try again later.')) + raise NiceBooksError(_('Nicebooks encountered an error.')) if '404 - ' in raw: - return None, self.urldata + return raw = xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True)[0] - try: - feed = soupparser.fromstring(raw) + return soupparser.fromstring(raw) except: try: #remove ASCII invalid chars return soupparser.fromstring(clean_ascii_chars(raw)) except: - return None, self.urldata + return None + + def __call__(self, browser, verbose, timeout = 5.): + feed = self.brcall(browser, self.urldata, verbose, timeout) + if feed is None: + return None, self.urldata #nb of page try: @@ -247,23 +257,10 @@ class Query(object): if len(nbresults) > 1: nbpagetoquery = int(ceil(float(min(int(nbresults[2]), self.max_results))/ int(nbresults[1]))) for i in xrange(2, nbpagetoquery + 1): - try: - urldata = self.urldata + '&page=' + str(i) - raw = browser.open_novisit(urldata, timeout=timeout).read() - except Exception, e: + urldata = self.urldata + '&page=' + str(i) + feed = self.brcall(browser, urldata, verbose, timeout) + if feed is None: continue - if '<title>404 - ' in raw: - continue - raw = xml_to_unicode(raw, strip_encoding_pats=True, - resolve_entities=True)[0] - try: - feed = soupparser.fromstring(raw) - except: - try: - #remove ASCII invalid chars - return soupparser.fromstring(clean_ascii_chars(raw)) - except: - continue pages.append(feed) results = [] @@ -453,11 +450,14 @@ class ResultList(object): try: raw = br.open_novisit(url).read() except Exception, e: + import socket report(verbose) if callable(getattr(e, 'getcode', None)) and \ e.getcode() == 404: return None - if isinstance(getattr(e, 'args', [None])[0], socket.timeout): + attr = getattr(e, 'args', [None]) + attr = attr if attr else [None] + if isinstance(attr[0], socket.timeout): raise AmazonError(_('Amazon timed out. Try again later.')) raise AmazonError(_('Amazon encountered an error.')) if '<title>404 - ' in raw: @@ -584,6 +584,7 @@ def get_social_metadata(title, authors, publisher, isbn, verbose=False, return [mi] def option_parser(): + import textwrap parser = OptionParser(textwrap.dedent(\ _('''\ %prog [options] @@ -648,6 +649,6 @@ if __name__ == '__main__': sys.exit(main()) # import cProfile # sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()")) - # sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()", "profile_tmp_2")) + # sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()", "profile")) -# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazonbis.py" -m 5 -a gore -v>data.html \ No newline at end of file +# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazon.py" -m 5 -a gore -v>data.html \ No newline at end of file diff --git a/src/calibre/ebooks/metadata/fictionwise.py b/src/calibre/ebooks/metadata/fictionwise.py index 9eabcb2ca8..a50bb2ce04 100644 --- a/src/calibre/ebooks/metadata/fictionwise.py +++ b/src/calibre/ebooks/metadata/fictionwise.py @@ -14,11 +14,12 @@ from calibre import browser, preferred_encoding from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.metadata import MetaInformation, check_isbn, \ authors_to_sort_string -from calibre.library.comments import sanitize_comments_html from calibre.ebooks.metadata.fetch import MetadataSource +from calibre.library.comments import sanitize_comments_html from calibre.utils.config import OptionParser -from calibre.utils.date import parse_date, utcnow from calibre.utils.cleantext import clean_ascii_chars, unescape +from calibre.utils.date import parse_date, utcnow + class Fictionwise(MetadataSource): diff --git a/src/calibre/ebooks/metadata/google_books.py b/src/calibre/ebooks/metadata/google_books.py index cac3cac7d0..765bb4a255 100644 --- a/src/calibre/ebooks/metadata/google_books.py +++ b/src/calibre/ebooks/metadata/google_books.py @@ -1,6 +1,6 @@ from __future__ import with_statement __license__ = 'GPL 3' -__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' +__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>, 2010, sengian <sengian1@gmail.com>' __docformat__ = 'restructuredtext en' import sys @@ -12,13 +12,13 @@ from functools import partial from lxml import etree from calibre import browser, preferred_encoding +from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.metadata import MetaInformation, check_isbn, \ authors_to_sort_string from calibre.ebooks.metadata.fetch import MetadataSource -from calibre.ebooks.chardet import xml_to_unicode from calibre.utils.config import OptionParser -from calibre.utils.date import parse_date, utcnow from calibre.utils.cleantext import clean_ascii_chars +from calibre.utils.date import parse_date, utcnow NAMESPACES = { 'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',