mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
cleaning
This commit is contained in:
parent
d374b36e97
commit
81af8382d6
@ -2,7 +2,7 @@ from __future__ import with_statement
|
|||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2010, sengian <sengian1@gmail.com>'
|
__copyright__ = '2010, sengian <sengian1@gmail.com>'
|
||||||
|
|
||||||
import sys, textwrap, re, traceback, socket
|
import sys, re
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
from Queue import Queue
|
from Queue import Queue
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
@ -61,6 +61,7 @@ class Amazon(MetadataSource):
|
|||||||
tempres.extend(tmpnoloc)
|
tempres.extend(tmpnoloc)
|
||||||
self.results = tempres
|
self.results = tempres
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
|
import traceback
|
||||||
self.exception = e
|
self.exception = e
|
||||||
self.tb = traceback.format_exc()
|
self.tb = traceback.format_exc()
|
||||||
|
|
||||||
@ -107,12 +108,14 @@ class AmazonSocial(MetadataSource):
|
|||||||
tmploc.tags = tmpnoloc.tags
|
tmploc.tags = tmpnoloc.tags
|
||||||
self.results = tmploc
|
self.results = tmploc
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
|
import traceback
|
||||||
self.exception = e
|
self.exception = e
|
||||||
self.tb = traceback.format_exc()
|
self.tb = traceback.format_exc()
|
||||||
|
|
||||||
|
|
||||||
def report(verbose):
|
def report(verbose):
|
||||||
if verbose:
|
if verbose:
|
||||||
|
import traceback
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
class AmazonError(Exception):
|
class AmazonError(Exception):
|
||||||
@ -208,33 +211,40 @@ class Query(object):
|
|||||||
q = q.encode('utf-8')
|
q = q.encode('utf-8')
|
||||||
self.urldata += '/gp/search/ref=sr_adv_b/?' + urlencode(q)
|
self.urldata += '/gp/search/ref=sr_adv_b/?' + urlencode(q)
|
||||||
|
|
||||||
def __call__(self, browser, verbose, timeout = 5.):
|
def brcall(self, browser, url, verbose, timeout):
|
||||||
if verbose:
|
if verbose:
|
||||||
print _('Query: %s') % self.urldata
|
print _('Query: %s') % url
|
||||||
|
|
||||||
try:
|
try:
|
||||||
raw = browser.open_novisit(self.urldata, timeout=timeout).read()
|
raw = browser.open_novisit(url, timeout=timeout).read()
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
|
import socket
|
||||||
report(verbose)
|
report(verbose)
|
||||||
if callable(getattr(e, 'getcode', None)) and \
|
if callable(getattr(e, 'getcode', None)) and \
|
||||||
e.getcode() == 404:
|
e.getcode() == 404:
|
||||||
return None, self.urldata
|
return None
|
||||||
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
|
attr = getattr(e, 'args', [None])
|
||||||
raise AmazonError(_('Amazon timed out. Try again later.'))
|
attr = attr if attr else [None]
|
||||||
raise AmazonError(_('Amazon encountered an error.'))
|
if isinstance(attr[0], socket.timeout):
|
||||||
|
raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
|
||||||
|
raise NiceBooksError(_('Nicebooks encountered an error.'))
|
||||||
if '<title>404 - ' in raw:
|
if '<title>404 - ' in raw:
|
||||||
return None, self.urldata
|
return
|
||||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
resolve_entities=True)[0]
|
resolve_entities=True)[0]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
feed = soupparser.fromstring(raw)
|
return soupparser.fromstring(raw)
|
||||||
except:
|
except:
|
||||||
try:
|
try:
|
||||||
#remove ASCII invalid chars
|
#remove ASCII invalid chars
|
||||||
return soupparser.fromstring(clean_ascii_chars(raw))
|
return soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
except:
|
except:
|
||||||
return None, self.urldata
|
return None
|
||||||
|
|
||||||
|
def __call__(self, browser, verbose, timeout = 5.):
|
||||||
|
feed = self.brcall(browser, self.urldata, verbose, timeout)
|
||||||
|
if feed is None:
|
||||||
|
return None, self.urldata
|
||||||
|
|
||||||
#nb of page
|
#nb of page
|
||||||
try:
|
try:
|
||||||
@ -247,23 +257,10 @@ class Query(object):
|
|||||||
if len(nbresults) > 1:
|
if len(nbresults) > 1:
|
||||||
nbpagetoquery = int(ceil(float(min(int(nbresults[2]), self.max_results))/ int(nbresults[1])))
|
nbpagetoquery = int(ceil(float(min(int(nbresults[2]), self.max_results))/ int(nbresults[1])))
|
||||||
for i in xrange(2, nbpagetoquery + 1):
|
for i in xrange(2, nbpagetoquery + 1):
|
||||||
try:
|
urldata = self.urldata + '&page=' + str(i)
|
||||||
urldata = self.urldata + '&page=' + str(i)
|
feed = self.brcall(browser, urldata, verbose, timeout)
|
||||||
raw = browser.open_novisit(urldata, timeout=timeout).read()
|
if feed is None:
|
||||||
except Exception, e:
|
|
||||||
continue
|
continue
|
||||||
if '<title>404 - ' in raw:
|
|
||||||
continue
|
|
||||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
|
||||||
resolve_entities=True)[0]
|
|
||||||
try:
|
|
||||||
feed = soupparser.fromstring(raw)
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
#remove ASCII invalid chars
|
|
||||||
return soupparser.fromstring(clean_ascii_chars(raw))
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
pages.append(feed)
|
pages.append(feed)
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
@ -453,11 +450,14 @@ class ResultList(object):
|
|||||||
try:
|
try:
|
||||||
raw = br.open_novisit(url).read()
|
raw = br.open_novisit(url).read()
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
|
import socket
|
||||||
report(verbose)
|
report(verbose)
|
||||||
if callable(getattr(e, 'getcode', None)) and \
|
if callable(getattr(e, 'getcode', None)) and \
|
||||||
e.getcode() == 404:
|
e.getcode() == 404:
|
||||||
return None
|
return None
|
||||||
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
|
attr = getattr(e, 'args', [None])
|
||||||
|
attr = attr if attr else [None]
|
||||||
|
if isinstance(attr[0], socket.timeout):
|
||||||
raise AmazonError(_('Amazon timed out. Try again later.'))
|
raise AmazonError(_('Amazon timed out. Try again later.'))
|
||||||
raise AmazonError(_('Amazon encountered an error.'))
|
raise AmazonError(_('Amazon encountered an error.'))
|
||||||
if '<title>404 - ' in raw:
|
if '<title>404 - ' in raw:
|
||||||
@ -584,6 +584,7 @@ def get_social_metadata(title, authors, publisher, isbn, verbose=False,
|
|||||||
return [mi]
|
return [mi]
|
||||||
|
|
||||||
def option_parser():
|
def option_parser():
|
||||||
|
import textwrap
|
||||||
parser = OptionParser(textwrap.dedent(\
|
parser = OptionParser(textwrap.dedent(\
|
||||||
_('''\
|
_('''\
|
||||||
%prog [options]
|
%prog [options]
|
||||||
@ -648,6 +649,6 @@ if __name__ == '__main__':
|
|||||||
sys.exit(main())
|
sys.exit(main())
|
||||||
# import cProfile
|
# import cProfile
|
||||||
# sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()"))
|
# sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()"))
|
||||||
# sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()", "profile_tmp_2"))
|
# sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()", "profile"))
|
||||||
|
|
||||||
# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazonbis.py" -m 5 -a gore -v>data.html
|
# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazon.py" -m 5 -a gore -v>data.html
|
@ -14,11 +14,12 @@ from calibre import browser, preferred_encoding
|
|||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
|
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
|
||||||
authors_to_sort_string
|
authors_to_sort_string
|
||||||
from calibre.library.comments import sanitize_comments_html
|
|
||||||
from calibre.ebooks.metadata.fetch import MetadataSource
|
from calibre.ebooks.metadata.fetch import MetadataSource
|
||||||
|
from calibre.library.comments import sanitize_comments_html
|
||||||
from calibre.utils.config import OptionParser
|
from calibre.utils.config import OptionParser
|
||||||
from calibre.utils.date import parse_date, utcnow
|
|
||||||
from calibre.utils.cleantext import clean_ascii_chars, unescape
|
from calibre.utils.cleantext import clean_ascii_chars, unescape
|
||||||
|
from calibre.utils.date import parse_date, utcnow
|
||||||
|
|
||||||
|
|
||||||
class Fictionwise(MetadataSource):
|
class Fictionwise(MetadataSource):
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
from __future__ import with_statement
|
from __future__ import with_statement
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>, 2010, sengian <sengian1@gmail.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
@ -12,13 +12,13 @@ from functools import partial
|
|||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from calibre import browser, preferred_encoding
|
from calibre import browser, preferred_encoding
|
||||||
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
|
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
|
||||||
authors_to_sort_string
|
authors_to_sort_string
|
||||||
from calibre.ebooks.metadata.fetch import MetadataSource
|
from calibre.ebooks.metadata.fetch import MetadataSource
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
|
||||||
from calibre.utils.config import OptionParser
|
from calibre.utils.config import OptionParser
|
||||||
from calibre.utils.date import parse_date, utcnow
|
|
||||||
from calibre.utils.cleantext import clean_ascii_chars
|
from calibre.utils.cleantext import clean_ascii_chars
|
||||||
|
from calibre.utils.date import parse_date, utcnow
|
||||||
|
|
||||||
NAMESPACES = {
|
NAMESPACES = {
|
||||||
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
|
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user