cleaning

2025-08-30 23:00:21 -04:00 · 2010-12-13 23:24:12 +01:00 · 2010-12-13 23:24:12 +01:00 · 81af8382d6
commit 81af8382d6
parent d374b36e97
3 changed files with 39 additions and 37 deletions
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@ -2,7 +2,7 @@ from __future__ import with_statement
 __license__ = 'GPL 3'
 __copyright__ = '2010, sengian <sengian1@gmail.com>'

-import sys, textwrap, re, traceback, socket
+import sys, re
 from threading import Thread
 from Queue import Queue
 from urllib import urlencode
@ -61,6 +61,7 @@ class Amazon(MetadataSource):
                        tempres.extend(tmpnoloc)
                self.results = tempres
        except Exception, e:
+            import traceback
            self.exception = e
            self.tb = traceback.format_exc()

@ -107,12 +108,14 @@ class AmazonSocial(MetadataSource):
                            tmploc.tags = tmpnoloc.tags
                self.results = tmploc
        except Exception, e:
+            import traceback
            self.exception = e
            self.tb = traceback.format_exc()


 def report(verbose):
    if verbose:
+        import traceback
        traceback.print_exc()

 class AmazonError(Exception):
@ -208,33 +211,40 @@ class Query(object):
            q = q.encode('utf-8')
        self.urldata += '/gp/search/ref=sr_adv_b/?' + urlencode(q)

-    def __call__(self, browser, verbose, timeout = 5.):
+    def brcall(self, browser, url, verbose, timeout):
        if verbose:
-            print _('Query: %s') % self.urldata
+            print _('Query: %s') % url
        
        try:
-            raw = browser.open_novisit(self.urldata, timeout=timeout).read()
+            raw = browser.open_novisit(url, timeout=timeout).read()
        except Exception, e:
+            import socket
            report(verbose)
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
-                return None, self.urldata
-            if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
-                raise AmazonError(_('Amazon timed out. Try again later.'))
-            raise AmazonError(_('Amazon encountered an error.'))
+                return None
+            attr = getattr(e, 'args', [None])
+            attr = attr if attr else [None]
+            if isinstance(attr[0], socket.timeout):
+                raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
+            raise NiceBooksError(_('Nicebooks encountered an error.'))
        if '<title>404 - ' in raw:
-            return None, self.urldata
+            return
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
                resolve_entities=True)[0]
-
        try:
-            feed = soupparser.fromstring(raw)
+            return soupparser.fromstring(raw)
        except:
            try:
                #remove ASCII invalid chars
                return soupparser.fromstring(clean_ascii_chars(raw))
            except:
-                return None, self.urldata
+                return None
+
+    def __call__(self, browser, verbose, timeout = 5.):
+        feed = self.brcall(browser, self.urldata, verbose, timeout)
+        if feed is None:
+            return None, self.urldata

        #nb of page
        try:
@ -247,23 +257,10 @@ class Query(object):
        if len(nbresults) > 1:
            nbpagetoquery = int(ceil(float(min(int(nbresults[2]), self.max_results))/ int(nbresults[1])))
            for i in xrange(2, nbpagetoquery + 1):
-                try:
-                    urldata = self.urldata + '&page=' + str(i)
-                    raw = browser.open_novisit(urldata, timeout=timeout).read()
-                except Exception, e:
+                urldata = self.urldata + '&page=' + str(i)
+                feed = self.brcall(browser, urldata, verbose, timeout)
+                if feed is None:
                    continue
-                if '<title>404 - ' in raw:
-                    continue
-                raw = xml_to_unicode(raw, strip_encoding_pats=True,
-                        resolve_entities=True)[0]
-                try:
-                    feed = soupparser.fromstring(raw)
-                except:
-                    try:
-                        #remove ASCII invalid chars
-                        return soupparser.fromstring(clean_ascii_chars(raw))
-                    except:
-                        continue
                pages.append(feed)

        results = []
@ -453,11 +450,14 @@ class ResultList(object):
        try:
            raw = br.open_novisit(url).read()
        except Exception, e:
+            import socket
            report(verbose)
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                return None
-            if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
+            attr = getattr(e, 'args', [None])
+            attr = attr if attr else [None]
+            if isinstance(attr[0], socket.timeout):
                raise AmazonError(_('Amazon timed out. Try again later.'))
            raise AmazonError(_('Amazon encountered an error.'))
        if '<title>404 - ' in raw:
@ -584,6 +584,7 @@ def get_social_metadata(title, authors, publisher, isbn, verbose=False,
    return [mi]

 def option_parser():
+    import textwrap
    parser = OptionParser(textwrap.dedent(\
    _('''\
        %prog [options]
@ -648,6 +649,6 @@ if __name__ == '__main__':
    sys.exit(main())
    # import cProfile
    # sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()"))
-    # sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()", "profile_tmp_2"))
+    # sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()", "profile"))

-# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazonbis.py" -m 5 -a gore -v>data.html
+# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazon.py" -m 5 -a gore -v>data.html
--- a/src/calibre/ebooks/metadata/fictionwise.py
+++ b/src/calibre/ebooks/metadata/fictionwise.py
@ -14,11 +14,12 @@ from calibre import browser, preferred_encoding
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.metadata import MetaInformation, check_isbn, \
    authors_to_sort_string
-from calibre.library.comments import sanitize_comments_html
 from calibre.ebooks.metadata.fetch import MetadataSource
+from calibre.library.comments import sanitize_comments_html
 from calibre.utils.config import OptionParser
-from calibre.utils.date import parse_date, utcnow
 from calibre.utils.cleantext import clean_ascii_chars, unescape
+from calibre.utils.date import parse_date, utcnow
+

 class Fictionwise(MetadataSource):

--- a/src/calibre/ebooks/metadata/google_books.py
+++ b/src/calibre/ebooks/metadata/google_books.py
@ -1,6 +1,6 @@
 from __future__ import with_statement
 __license__ = 'GPL 3'
-__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>, 2010, sengian <sengian1@gmail.com>'
 __docformat__ = 'restructuredtext en'

 import sys
@ -12,13 +12,13 @@ from functools import partial
 from lxml import etree

 from calibre import browser, preferred_encoding
+from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.metadata import MetaInformation, check_isbn, \
    authors_to_sort_string
 from calibre.ebooks.metadata.fetch import MetadataSource
-from calibre.ebooks.chardet import xml_to_unicode
 from calibre.utils.config import OptionParser
-from calibre.utils.date import parse_date, utcnow
 from calibre.utils.cleantext import clean_ascii_chars
+from calibre.utils.date import parse_date, utcnow

 NAMESPACES = {
              'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',