Replace amazon default plugin

2025-07-09 03:04:10 -04:00 · 2010-12-12 18:31:18 +01:00 · 2010-12-12 18:31:18 +01:00 · d4e4c8b156
commit d4e4c8b156
parent 43ecf8c40d
3 changed files with 633 additions and 763 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -483,7 +483,7 @@ from calibre.devices.kobo.driver import KOBO
 from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, LibraryThing
 from calibre.ebooks.metadata.douban import DoubanBooks
 from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
-from calibre.ebooks.metadata.amazonbis import Amazon, AmazonSocial
+from calibre.ebooks.metadata.amazon import Amazon, AmazonSocial
 from calibre.ebooks.metadata.fictionwise import Fictionwise
 from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
        LibraryThingCovers, DoubanCovers
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@ -1,130 +1,653 @@
-#!/usr/bin/env  python
+from __future__ import with_statement
-__license__   = 'GPL v3'
+__license__ = 'GPL 3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
+__copyright__ = '2010, sengian <sengian1@gmail.com>'
 __docformat__ = 'restructuredtext en'
-'''
+import sys, textwrap, re, traceback, socket
-Fetch metadata using Amazon AWS
+from threading import Thread
-'''
+from Queue import Queue
-import sys, re
+from urllib import urlencode
 from math import ceil
-from lxml import html
+from lxml.html import soupparser, tostring
 from lxml.html import soupparser
-from calibre import browser
+from calibre.utils.date import parse_date, utcnow, replace_months
-from calibre.ebooks.metadata import check_isbn
+from calibre.utils.cleantext import clean_ascii_chars
-from calibre.ebooks.metadata.book.base import Metadata
+from calibre.utils.localization import get_lang
 from calibre import browser, preferred_encoding
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.metadata import MetaInformation, check_isbn, \
    authors_to_sort_string
 from calibre.ebooks.metadata.fetch import MetadataSource
 from calibre.utils.config import OptionParser
 from calibre.library.comments import sanitize_comments_html
 def find_asin(br, isbn):
    q = 'http://www.amazon.com/s?field-keywords='+isbn
    raw = br.open_novisit(q).read()
    raw = xml_to_unicode(raw, strip_encoding_pats=True,
            resolve_entities=True)[0]
    root = html.fromstring(raw)
    revs = root.xpath('//*[@class="asinReviewsSummary" and @name]')
    revs = [x.get('name') for x in revs]
    if revs:
        return revs[0]
-def to_asin(br, isbn):
+class Amazon(MetadataSource):
-    if len(isbn) == 13:
+
    name = 'Amazon'
    description = _('Downloads metadata from amazon.com')
    supported_platforms = ['windows', 'osx', 'linux']
    author = 'Kovid Goyal & Sengian'
    version = (1, 0, 0)
    has_html_comments = True
    def fetch(self):
        try:
-            asin = find_asin(br, isbn)
+            lang = get_lang()
            lang = lang[:2] if re.match(r'(fr.*|de.*)', lang) else 'all'
            if lang == 'all':
                self.results = search(self.title, self.book_author, self.publisher,
                                  self.isbn, max_results=10, verbose=self.verbose, lang='all')
            else:
                tmploc = ThreadwithResults(search, self.title, self.book_author, 
                                self.publisher,self.isbn, max_results=5,
                                    verbose=self.verbose, lang=lang)
                tmpnoloc = ThreadwithResults(search, self.title, self.book_author,
                                self.publisher, self.isbn, max_results=5,
                                    verbose=self.verbose, lang='all')
                tmploc.start()
                tmpnoloc.start()
                tmploc.join()
                tmpnoloc.join()
                tmploc= tmploc.get_result()
                tmpnoloc= tmpnoloc.get_result()
                tempres = None
                if tmpnoloc is not None:
                    tempres = tmpnoloc
                if tmploc is not None:
                    tempres = tmploc
                    if tmpnoloc is not None:
                        tempres.extend(tmpnoloc)
                self.results = tempres
        except Exception, e:
            self.exception = e
            self.tb = traceback.format_exc()
 class AmazonSocial(MetadataSource):
    name = 'AmazonSocial'
    metadata_type = 'social'
    description = _('Downloads social metadata from amazon.com')
    supported_platforms = ['windows', 'osx', 'linux']
    author = 'Kovid Goyal & Sengian'
    version = (1, 0, 1)
    has_html_comments = True
    def fetch(self):
        if not self.isbn:
            return
        try:
            lang = get_lang()
            lang = lang[:2] if re.match(r'(fr.*|de.*)', lang) else 'all'
            if lang == 'all':
                self.results = get_social_metadata(self.title, self.book_author, self.publisher,
                                    self.isbn, verbose=self.verbose, lang='all')[0]
            else:
                tmploc = ThreadwithResults(get_social_metadata, self.title, self.book_author, 
                                    self.publisher,self.isbn, verbose=self.verbose, lang=lang)
                tmpnoloc = ThreadwithResults(get_social_metadata, self.title, self.book_author,
                                    self.publisher, self.isbn, verbose=self.verbose, lang='all')
                tmploc.start()
                tmpnoloc.start()
                tmploc.join()
                tmpnoloc.join()
                tmploc= tmploc.get_result()
                if tmploc is not None:
                    tmploc = tmploc[0]
                tmpnoloc= tmpnoloc.get_result()
                if tmpnoloc is not None:
                    tmpnoloc = tmpnoloc[0]
                    if tmpnoloc is not None:
                        if tmploc.rating is None:
                            tmploc.rating = tmpnoloc.rating
                        if tmploc.comments is not None:
                            tmploc.comments = tmpnoloc.comments
                        if tmploc.tags is None:
                            tmploc.tags = tmpnoloc.tags
                self.results = tmploc
        except Exception, e:
            self.exception = e
            self.tb = traceback.format_exc()
 def report(verbose):
    if verbose:
        traceback.print_exc()
 class AmazonError(Exception):
    pass
 class ThreadwithResults(Thread):
    def __init__(self, func, *args, **kargs):
        self.func = func
        self.args = args
        self.kargs = kargs
        self.result = None
        Thread.__init__(self)
    def get_result(self):
        return self.result
    def run(self):
        self.result = self.func(*self.args, **self.kargs)
 class Query(object):
    BASE_URL_ALL = 'http://www.amazon.com'
    BASE_URL_FR = 'http://www.amazon.fr'
    BASE_URL_DE = 'http://www.amazon.de'
    def __init__(self, title=None, author=None, publisher=None, isbn=None, keywords=None,
        max_results=20, rlang='all'):
        assert not(title is None and author is None and publisher is None \
            and isbn is None and keywords is None)
        assert (max_results < 21)
        self.max_results = int(max_results)
        self.renbres = re.compile(u'\s*([0-9.,]+)\s*')
        q = {   'search-alias' : 'stripbooks' ,
                'unfiltered' : '1',
                'field-keywords' : '',
                'field-author' : '',
                'field-title' : '',
                'field-isbn' : '',
                'field-publisher' : ''
                #get to amazon detailed search page to get all options
                # 'node' : '',
                # 'field-binding' : '',
                #before, during, after
                # 'field-dateop' : '',
                #month as number
                # 'field-datemod' : '',
                # 'field-dateyear' : '',
                #french only
                # 'field-collection' : '',
                #many options available
            }
        if rlang =='all' or rlang =='en':
            q['sort'] = 'relevanceexprank'
            self.urldata = self.BASE_URL_ALL
        # elif rlang =='es':
            # q['sort'] = 'relevanceexprank'
            # q['field-language'] = 'Spanish'
            # self.urldata = self.BASE_URL_ALL
        # elif rlang =='en':
            # q['sort'] = 'relevanceexprank'
            # q['field-language'] = 'English'
            # self.urldata = self.BASE_URL_ALL
        elif rlang =='fr':
            q['sort'] = 'relevancerank'
            self.urldata = self.BASE_URL_FR
        elif rlang =='de':
            q['sort'] = 'relevancerank'
            self.urldata = self.BASE_URL_DE
        self.baseurl = self.urldata
        if title == _('Unknown'):
            title=None
        if author == _('Unknown'):
            author=None
        if isbn is not None:
            q['field-isbn'] = isbn.replace('-', '')
        else:
            if title is not None:
                q['field-title'] = title
            if author is not None:
                q['field-author'] = author
            if publisher is not None:
                q['field-publisher'] = publisher
            if keywords is not None:
                q['field-keywords'] = keywords
        if isinstance(q, unicode):
            q = q.encode('utf-8')
        self.urldata += '/gp/search/ref=sr_adv_b/?' + urlencode(q)
    def __call__(self, browser, verbose, timeout = 5.):
        if verbose:
            print _('Query: %s') % self.urldata
        try:
            raw = browser.open_novisit(self.urldata, timeout=timeout).read()
        except Exception, e:
            report(verbose)
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                return None, self.urldata
            if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
                raise AmazonError(_('Amazon timed out. Try again later.'))
            raise AmazonError(_('Amazon encountered an error.'))
        if '<title>404 - ' in raw:
            return None, self.urldata
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
                resolve_entities=True)[0]
        try:
            feed = soupparser.fromstring(raw)
        except:
-            import traceback
+            try:
-            traceback.print_exc()
+                #remove ASCII invalid chars
-            asin = None
+                return soupparser.fromstring(clean_ascii_chars(raw))
-    else:
+            except:
-        asin = isbn
+                return None, self.urldata
    return asin
-def get_social_metadata(title, authors, publisher, isbn):
+        #nb of page
-    mi = Metadata(title, authors)
+        try:
-    if not isbn:
+            nbresults = self.renbres.findall(feed.xpath("//*[@class='resultCount']")[0].text)
-        return mi
+            nbresults = [re.sub(r'[.,]', '', x) for x in nbresults]
-    isbn = check_isbn(isbn)
+        except:
-    if not isbn:
+            return None, self.urldata
        return mi
    br = browser()
    asin = to_asin(br, isbn)
    if asin and get_metadata(br, asin, mi):
        return mi
    from calibre.ebooks.metadata.xisbn import xisbn
    for i in xisbn.get_associated_isbns(isbn):
        asin = to_asin(br, i)
        if asin and get_metadata(br, asin, mi):
            return mi
    return mi
-def get_metadata(br, asin, mi):
+        pages =[feed]
-    q = 'http://amzn.com/'+asin
+        if len(nbresults) > 1:
-    try:
+            nbpagetoquery = int(ceil(float(min(int(nbresults[2]), self.max_results))/ int(nbresults[1])))
-        raw = br.open_novisit(q).read()
+            for i in xrange(2, nbpagetoquery + 1):
    except Exception, e:
        if callable(getattr(e, 'getcode', None)) and \
                e.getcode() == 404:
            return False
        raise
    if '<title>404 - ' in raw:
        return False
    raw = xml_to_unicode(raw, strip_encoding_pats=True,
            resolve_entities=True)[0]
    try:
        root = soupparser.fromstring(raw)
    except:
        return False
    ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]')
    if ratings:
        pat = re.compile(r'([0-9.]+) out of (\d+) stars')
        r = ratings[0]
        for elem in r.xpath('descendant::*[@title]'):
            t = elem.get('title')
            m = pat.match(t)
            if m is not None:
                try:
-                    mi.rating = float(m.group(1))/float(m.group(2)) * 5
+                    urldata = self.urldata + '&page=' + str(i)
-                    break
+                    raw = browser.open_novisit(urldata, timeout=timeout).read()
                except Exception, e:
                    continue
                if '<title>404 - ' in raw:
                    continue
                raw = xml_to_unicode(raw, strip_encoding_pats=True,
                        resolve_entities=True)[0]
                try:
                    feed = soupparser.fromstring(raw)
                except:
-                    pass
+                    try:
                        #remove ASCII invalid chars
                        return soupparser.fromstring(clean_ascii_chars(raw))
                    except:
                        continue
                pages.append(feed)
-    desc = root.xpath('//div[@id="productDescription"]/*[@class="content"]')
+        results = []
-    if desc:
+        for x in pages:
-        desc = desc[0]
+            results.extend([i.getparent().get('href') \
-        for c in desc.xpath('descendant::*[@class="seeAll" or'
+                for i in x.xpath("//a/span[@class='srTitle']")])
-                ' @class="emptyClear" or @href]'):
+        return results[:self.max_results], self.baseurl
            c.getparent().remove(c)
        desc = html.tostring(desc, method='html', encoding=unicode).strip()
        # remove all attributes from tags
        desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
        # Collapse whitespace
        #desc = re.sub('\n+', '\n', desc)
        #desc = re.sub(' +', ' ', desc)
        # Remove the notice about text referring to out of print editions
        desc = re.sub(r'(?s)<em>--This text ref.*?</em>', '', desc)
        # Remove comments
        desc = re.sub(r'(?s)<!--.*?-->', '', desc)
        mi.comments = sanitize_comments_html(desc)
-    return True
+class ResultList(object):
    def __init__(self, baseurl, lang = 'all'):
        self.baseurl = baseurl
        self.lang = lang
        self.thread = []
        self.res = []
        self.nbtag = 0
        self.repub = re.compile(u'\((.*)\)')
        self.rerat = re.compile(u'([0-9.]+)')
        self.reattr = re.compile(r'<([a-zA-Z0-9]+)\s[^>]+>')
        self.reoutp = re.compile(r'(?s)<em>--This text ref.*?</em>')
        self.recom = re.compile(r'(?s)<!--.*?-->')
        self.republi = re.compile(u'(Editeur|Publisher|Verlag)', re.I)
        self.reisbn = re.compile(u'(ISBN-10|ISBN-10|ASIN)', re.I)
        self.relang = re.compile(u'(Language|Langue|Sprache)', re.I)
        self.reratelt = re.compile(u'(Average\s*Customer\s*Review|Moyenne\s*des\s*commentaires\s*client|Durchschnittliche\s*Kundenbewertung)', re.I)
        self.reprod = re.compile(u'(Product\s*Details|D.tails\s*sur\s*le\s*produit|Produktinformation)', re.I)
    def strip_tags_etree(self, etreeobj, invalid_tags):
        for (itag, rmv) in invalid_tags.iteritems():
            if rmv:
                for elts in etreeobj.getiterator(itag):
                    elts.drop_tree()
            else:
                for elts in etreeobj.getiterator(itag):
                    elts.drop_tag()
    def clean_entry(self, entry, invalid_tags = {'script': True},
                invalid_id = (), invalid_class=()):
        #invalid_tags: remove tag and keep content if False else remove
        #remove tags
        if invalid_tags:
            self.strip_tags_etree(entry, invalid_tags)
        #remove id
        if invalid_id:
            for eltid in invalid_id:
                elt = entry.get_element_by_id(eltid)
                if elt is not None:
                    elt.drop_tree()
        #remove class
        if invalid_class:
            for eltclass in invalid_class:
                elts = entry.find_class(eltclass)
                if elts is not None:
                    for elt in elts:
                        elt.drop_tree()
    def get_title(self, entry):
        title = entry.get_element_by_id('btAsinTitle')
        if title is not None:
            title = title.text
        return unicode(title.replace('\n', '').strip())
    def get_authors(self, entry):
        author = entry.get_element_by_id('btAsinTitle')
        while author.getparent().tag != 'div':
            author = author.getparent()
        author = author.getparent()
        authortext = []
        for x in author.getiterator('a'):
            authortext.append(unicode(x.text_content().strip()))
        return authortext
    def get_description(self, entry, verbose):
        try:
            description = entry.get_element_by_id("productDescription").find("div[@class='content']")
            inv_class = ('seeAll', 'emptyClear')
            inv_tags ={'img': True, 'a': False}
            self.clean_entry(description, invalid_tags=inv_tags, invalid_class=inv_class)
            description = tostring(description, method='html', encoding=unicode).strip()
            # remove all attributes from tags
            description = self.reattr.sub(r'<\1>', description)
            # Remove the notice about text referring to out of print editions
            description = self.reoutp.sub('', description)
            # Remove comments
            description = self.recom.sub('', description)
            return unicode(sanitize_comments_html(description))
        except:
            report(verbose)
            return None
    def get_tags(self, entry, verbose):
        try:
            tags = entry.get_element_by_id('tagContentHolder')
            testptag = tags.find_class('see-all')
            if testptag:
                for x in testptag:
                    alink = x.xpath('descendant-or-self::a')
                    if alink:
                        if alink[0].get('class') == 'tgJsActive':
                            continue
                        return self.baseurl + alink[0].get('href'), True
            tags = [a.text for a in tags.getiterator('a') if a.get('rel') == 'tag']
        except:
            report(verbose)
            tags = [], False
        return tags, False
    def get_book_info(self, entry, mi, verbose):
        try:
            entry = entry.get_element_by_id('SalesRank').getparent()
        except:
            try:
                for z in entry.getiterator('h2'):
                    if self.reprod.search(z.text_content()):
                        entry = z.getparent().find("div[@class='content']/ul")
                        break
            except:
                report(verbose)
                return mi
        elts = entry.findall('li')
        #pub & date
        elt = filter(lambda x: self.republi.search(x.find('b').text), elts)
        if elt:
            pub = elt[0].find('b').tail
            mi.publisher = unicode(self.repub.sub('', pub).strip())
            d = self.repub.search(pub)
            if d is not None:
                d = d.group(1)
                try:
                    default = utcnow().replace(day=15)
                    if self.lang != 'all':
                        d = replace_months(d, self.lang)
                    d = parse_date(d, assume_utc=True, default=default)
                    mi.pubdate = d
                except:
                    report(verbose)
        #ISBN
        elt = filter(lambda x: self.reisbn.search(x.find('b').text), elts)
        if elt:
            isbn = elt[0].find('b').tail.replace('-', '').strip()
            if check_isbn(isbn):
                    mi.isbn = unicode(isbn)
            elif len(elt) > 1:
                isbnone = elt[1].find('b').tail.replace('-', '').strip()
                if check_isbn(isbnone):
                    mi.isbn = unicode(isbnone)
            else:
                #assume ASIN-> find a check for asin
                mi.isbn = unicode(isbn)
        #Langue
        elt = filter(lambda x: self.relang.search(x.find('b').text), elts)
        if elt:
            langue = elt[0].find('b').tail.strip()
            if langue:
                mi.language = unicode(langue)
        #ratings
        elt = filter(lambda x: self.reratelt.search(x.find('b').text), elts)
        if elt:
            ratings = elt[0].find_class('swSprite')
            if ratings:
                ratings = self.rerat.findall(ratings[0].get('title'))
                if len(ratings) == 2:
                    mi.rating = float(ratings[0])/float(ratings[1]) * 5
        return mi
    def fill_MI(self, entry, verbose):
        try:
            title = self.get_title(entry)
            authors = self.get_authors(entry)
        except Exception, e:
            if verbose:
                print _('Failed to get all details for an entry')
                print e
                print _('URL who failed: %s') % x
                report(verbose)
            return None
        mi = MetaInformation(title, authors)
        mi.author_sort = authors_to_sort_string(authors)
        try:
            mi.comments = self.get_description(entry, verbose)
            mi = self.get_book_info(entry, mi, verbose)
        except:
            pass
        return mi
    def get_individual_metadata(self, url, br, verbose):
        try:
            raw = br.open_novisit(url).read()
        except Exception, e:
            report(verbose)
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                return None
            if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
                raise AmazonError(_('Amazon timed out. Try again later.'))
            raise AmazonError(_('Amazon encountered an error.'))
        if '<title>404 - ' in raw:
            report(verbose)
            return None
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
                resolve_entities=True)[0]
        try:
            return soupparser.fromstring(raw)
        except:
            try:
                #remove ASCII invalid chars
                return soupparser.fromstring(clean_ascii_chars(raw))
            except:
                report(verbose)
                return None
    def fetchdatathread(self, qbr, qsync, nb, url, verbose):
        try:
            browser = qbr.get(True)
            entry = self.get_individual_metadata(url, browser, verbose)
        except:
            report(verbose)
            entry = None
        finally:
            qbr.put(browser, True)
            qsync.put(nb, True)
            return entry
    def producer(self, sync, urls, br, verbose=False):
        for i in xrange(len(urls)):
            thread = ThreadwithResults(self.fetchdatathread, br, sync,
                                            i, urls[i], verbose)
            thread.start()
            self.thread.append(thread)
    def consumer(self, sync, syncbis, br, total_entries, verbose=False):
        i=0
        while i < total_entries:
            nb = int(sync.get(True))
            self.thread[nb].join()
            entry = self.thread[nb].get_result()
            i+=1
            if entry is not None:
                mi = self.fill_MI(entry, verbose)
                if mi is not None:
                    mi.tags, atag = self.get_tags(entry, verbose)
                    self.res[nb] = mi
                    if atag:
                        threadbis = ThreadwithResults(self.fetchdatathread,
                                        br, syncbis, nb, mi.tags, verbose)
                        self.thread[nb] = threadbis
                        self.nbtag +=1
                        threadbis.start()
    def populate(self, entries, ibr, verbose=False, brcall=3):
        br = Queue(brcall)
        cbr = Queue(brcall-1)
        syncp = Queue(1)
        syncc = Queue(len(entries))
        for i in xrange(brcall-1):
            br.put(browser(), True)
            cbr.put(browser(), True)
        br.put(ibr, True)
        self.res = [None]*len(entries)
        prod_thread = Thread(target=self.producer, args=(syncp, entries, br, verbose))
        cons_thread = Thread(target=self.consumer, args=(syncp, syncc, cbr, len(entries), verbose))
        prod_thread.start()
        cons_thread.start()
        prod_thread.join()
        cons_thread.join()
        #finish processing
        for i in xrange(self.nbtag):
            nb = int(syncc.get(True))
            tags = self.thread[nb].get_result()
            if tags is not None:
                self.res[nb].tags = self.get_tags(tags, verbose)[0]
        return self.res
 def search(title=None, author=None, publisher=None, isbn=None,
           max_results=5, verbose=False, keywords=None, lang='all'):
    br = browser()
    entries, baseurl = Query(title=title, author=author, isbn=isbn, publisher=publisher,
        keywords=keywords, max_results=max_results,rlang=lang)(br, verbose)
    if entries is None or len(entries) == 0:
        return None
    #List of entry
    ans = ResultList(baseurl, lang)
    return [x for x in ans.populate(entries, br, verbose) if x is not None]
 def get_social_metadata(title, authors, publisher, isbn, verbose=False,
        max_results=1, lang='all'):
    mi = MetaInformation(title, authors)
    if not isbn or not check_isbn(isbn):
        return [mi]
    amazresults = search(isbn=isbn, verbose=verbose,
                max_results=max_results, lang=lang)
    if amazresults is None or amazresults[0] is None:
        from calibre.ebooks.metadata.xisbn import xisbn
        for i in xisbn.get_associated_isbns(isbn):
            amazresults = search(isbn=i, verbose=verbose,
                max_results=max_results, lang=lang)
            if amazresults is not None and amazresults[0] is not None:
                break
    if amazresults is None or amazresults[0] is None:
        return [mi]
    miaz = amazresults[0]
    if miaz.rating is not None:
        mi.rating = miaz.rating
    if miaz.comments is not None:
        mi.comments = miaz.comments
    if miaz.tags is not None:
        mi.tags = miaz.tags
    return [mi]
 def option_parser():
    parser = OptionParser(textwrap.dedent(\
    _('''\
        %prog [options]
        Fetch book metadata from Amazon. You must specify one of title, author,
        ISBN, publisher or keywords. Will fetch a maximum of 20 matches,
        so you should make your query as specific as possible.
        You can chose the language for metadata retrieval:
        english & french & german
    '''
    )))
    parser.add_option('-t', '--title', help=_('Book title'))
    parser.add_option('-a', '--author', help=_('Book author(s)'))
    parser.add_option('-p', '--publisher', help=_('Book publisher'))
    parser.add_option('-i', '--isbn', help=_('Book ISBN'))
    parser.add_option('-k', '--keywords', help=_('Keywords'))
    parser.add_option('-s', '--social', default=0, action='count',
                      help=_('Get social data only'))
    parser.add_option('-m', '--max-results', default=10,
                      help=_('Maximum number of results to fetch'))
    parser.add_option('-l', '--lang', default='all',
                      help=_('Chosen language for metadata search (en, fr, de)'))
    parser.add_option('-v', '--verbose', default=0, action='count',
                      help=_('Be more verbose about errors'))
    return parser
 def main(args=sys.argv):
-    # Test xisbn
+    parser = option_parser()
-    print get_social_metadata('Learning Python', None, None, '8324616489')
+    opts, args = parser.parse_args(args)
-    print
+    try:
-
+        if opts.social:
-    # Test sophisticated comment formatting
+            results = get_social_metadata(opts.title, opts.author,
-    print get_social_metadata('Angels & Demons', None, None, '9781416580829')
+                opts.publisher, opts.isbn, verbose=opts.verbose, lang=opts.lang)
-    print
+        else:
-
+            results = search(opts.title, opts.author, isbn=opts.isbn,
-    # Random tests
+                publisher=opts.publisher, keywords=opts.keywords, verbose=opts.verbose,
-    print get_social_metadata('Star Trek: Destiny: Mere Mortals', None, None, '9781416551720')
+                    max_results=opts.max_results, lang=opts.lang)
-    print
+    except AssertionError:
-    print get_social_metadata('The Great Gatsby', None, None, '0743273567')
+        report(True)
-
+        parser.print_help()
-    return 0
+        return 1
    if results is None and len(results) == 0:
        print _('No result found for this search!')
        return 0
    for result in results:
        print unicode(result).encode(preferred_encoding, 'replace')
        print
    #test social
    # '''Test xisbn'''
    # print get_social_metadata('Learning Python', None, None, '8324616489')[0]
    # print
    # '''Test sophisticated comment formatting'''
    # print get_social_metadata('Angels & Demons', None, None, '9781416580829')[0]
    # print
    # '''Random tests'''
    # print get_social_metadata('Star Trek: Destiny: Mere Mortals', None, None, '9781416551720')[0]
    # print
    # print get_social_metadata('The Great Gatsby', None, None, '0743273567')[0]
 if __name__ == '__main__':
    sys.exit(main())
    # import cProfile
    # sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()"))
    # sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()", "profile_tmp_2"))
 # calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazonbis.py" -m 5 -a gore -v>data.html
--- a/src/calibre/ebooks/metadata/amazonbis.py
+++ b/src/calibre/ebooks/metadata/amazonbis.py
@ -1,653 +0,0 @@
 from __future__ import with_statement
 __license__ = 'GPL 3'
 __copyright__ = '2010, sengian <sengian1@gmail.com>'
 import sys, textwrap, re, traceback, socket
 from threading import Thread
 from Queue import Queue
 from urllib import urlencode
 from math import ceil
 from lxml.html import soupparser, tostring
 from calibre.utils.date import parse_date, utcnow, replace_months
 from calibre.utils.cleantext import clean_ascii_chars
 from calibre.utils.localization import get_lang
 from calibre import browser, preferred_encoding
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.metadata import MetaInformation, check_isbn, \
    authors_to_sort_string
 from calibre.ebooks.metadata.fetch import MetadataSource
 from calibre.utils.config import OptionParser
 from calibre.library.comments import sanitize_comments_html
 class Amazon(MetadataSource):
    name = 'Amazon'
    description = _('Downloads metadata from amazon.com')
    supported_platforms = ['windows', 'osx', 'linux']
    author = 'Kovid Goyal & Sengian'
    version = (1, 0, 0)
    has_html_comments = True
    def fetch(self):
        try:
            lang = get_lang()
            lang = lang[:2] if re.match(r'(fr.*|de.*)', lang) else 'all'
            if lang == 'all':
                self.results = search(self.title, self.book_author, self.publisher,
                                  self.isbn, max_results=5, verbose=self.verbose, lang='all')
            else:
                tmploc = ThreadwithResults(search, self.title, self.book_author, 
                                self.publisher,self.isbn, max_results=5,
                                    verbose=self.verbose, lang=lang)
                tmpnoloc = ThreadwithResults(search, self.title, self.book_author,
                                self.publisher, self.isbn, max_results=5,
                                    verbose=self.verbose, lang='all')
                tmploc.start()
                tmpnoloc.start()
                tmploc.join()
                tmpnoloc.join()
                tmploc= tmploc.get_result()
                tmpnoloc= tmpnoloc.get_result()
                tempres = None
                if tmpnoloc is not None:
                    tempres = tmpnoloc
                if tmploc is not None:
                    tempres = tmploc
                    if tmpnoloc is not None:
                        tempres.extend(tmpnoloc)
                self.results = tmpres
        except Exception, e:
            self.exception = e
            self.tb = traceback.format_exc()
 class AmazonSocial(MetadataSource):
    name = 'AmazonSocial'
    metadata_type = 'social'
    description = _('Downloads social metadata from amazon.com')
    supported_platforms = ['windows', 'osx', 'linux']
    author = 'Kovid Goyal & Sengian'
    version = (1, 0, 1)
    has_html_comments = True
    def fetch(self):
        if not self.isbn:
            return
        try:
            lang = get_lang()
            lang = lang[:2] if re.match(r'(fr.*|de.*)', lang) else 'all'
            if lang == 'all':
                self.results = get_social_metadata(self.title, self.book_author, self.publisher,
                                    self.isbn, verbose=self.verbose, lang='all')[0]
            else:
                tmploc = ThreadwithResults(get_social_metadata, self.title, self.book_author, 
                                    self.publisher,self.isbn, verbose=self.verbose, lang=lang)
                tmpnoloc = ThreadwithResults(get_social_metadata, self.title, self.book_author,
                                    self.publisher, self.isbn, verbose=self.verbose, lang='all')
                tmploc.start()
                tmpnoloc.start()
                tmploc.join()
                tmpnoloc.join()
                tmploc= tmploc.get_result()
                if tmploc is not None:
                    tmploc = tmploc[0]
                tmpnoloc= tmpnoloc.get_result()
                if tmpnoloc is not None:
                    tmpnoloc = tmpnoloc[0]
                    if tmpnoloc is not None:
                        if tmploc.rating is None:
                            tmploc.rating = tmpnoloc.rating
                        if tmploc.comments is not None:
                            tmploc.comments = tmpnoloc.comments
                        if tmploc.tags is None:
                            tmploc.tags = tmpnoloc.tags
                self.results = tmploc
        except Exception, e:
            self.exception = e
            self.tb = traceback.format_exc()
 def report(verbose):
    if verbose:
        traceback.print_exc()
 class AmazonError(Exception):
    pass
 class ThreadwithResults(Thread):
    def __init__(self, func, *args, **kargs):
        self.func = func
        self.args = args
        self.kargs = kargs
        self.result = None
        Thread.__init__(self)
    def get_result(self):
        return self.result
    def run(self):
        self.result = self.func(*self.args, **self.kargs)
 class Query(object):
    BASE_URL_ALL = 'http://www.amazon.com'
    BASE_URL_FR = 'http://www.amazon.fr'
    BASE_URL_DE = 'http://www.amazon.de'
    def __init__(self, title=None, author=None, publisher=None, isbn=None, keywords=None,
        max_results=20, rlang='all'):
        assert not(title is None and author is None and publisher is None \
            and isbn is None and keywords is None)
        assert (max_results < 21)
        self.max_results = int(max_results)
        self.renbres = re.compile(u'\s*([0-9.,]+)\s*')
        q = {   'search-alias' : 'stripbooks' ,
                'unfiltered' : '1',
                'field-keywords' : '',
                'field-author' : '',
                'field-title' : '',
                'field-isbn' : '',
                'field-publisher' : ''
                #get to amazon detailed search page to get all options
                # 'node' : '',
                # 'field-binding' : '',
                #before, during, after
                # 'field-dateop' : '',
                #month as number
                # 'field-datemod' : '',
                # 'field-dateyear' : '',
                #french only
                # 'field-collection' : '',
                #many options available
            }
        if rlang =='all' or rlang =='en':
            q['sort'] = 'relevanceexprank'
            self.urldata = self.BASE_URL_ALL
        # elif rlang =='es':
            # q['sort'] = 'relevanceexprank'
            # q['field-language'] = 'Spanish'
            # self.urldata = self.BASE_URL_ALL
        # elif rlang =='en':
            # q['sort'] = 'relevanceexprank'
            # q['field-language'] = 'English'
            # self.urldata = self.BASE_URL_ALL
        elif rlang =='fr':
            q['sort'] = 'relevancerank'
            self.urldata = self.BASE_URL_FR
        elif rlang =='de':
            q['sort'] = 'relevancerank'
            self.urldata = self.BASE_URL_DE
        self.baseurl = self.urldata
        if title == _('Unknown'):
            title=None
        if author == _('Unknown'):
            author=None
        if isbn is not None:
            q['field-isbn'] = isbn.replace('-', '')
        else:
            if title is not None:
                q['field-title'] = title
            if author is not None:
                q['field-author'] = author
            if publisher is not None:
                q['field-publisher'] = publisher
            if keywords is not None:
                q['field-keywords'] = keywords
        if isinstance(q, unicode):
            q = q.encode('utf-8')
        self.urldata += '/gp/search/ref=sr_adv_b/?' + urlencode(q)
    def __call__(self, browser, verbose, timeout = 5.):
        if verbose:
            print _('Query: %s') % self.urldata
        try:
            raw = browser.open_novisit(self.urldata, timeout=timeout).read()
        except Exception, e:
            report(verbose)
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                return None, self.urldata
            if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
                raise AmazonError(_('Amazon timed out. Try again later.'))
            raise AmazonError(_('Amazon encountered an error.'))
        if '<title>404 - ' in raw:
            return None, self.urldata
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
                resolve_entities=True)[0]
        try:
            feed = soupparser.fromstring(raw)
        except:
            try:
                #remove ASCII invalid chars
                return soupparser.fromstring(clean_ascii_chars(raw))
            except:
                return None, self.urldata
        #nb of page
        try:
            nbresults = self.renbres.findall(feed.xpath("//*[@class='resultCount']")[0].text)
            nbresults = [re.sub(r'[.,]', '', x) for x in nbresults]
        except:
            return None, self.urldata
        pages =[feed]
        if len(nbresults) > 1:
            nbpagetoquery = int(ceil(float(min(int(nbresults[2]), self.max_results))/ int(nbresults[1])))
            for i in xrange(2, nbpagetoquery + 1):
                try:
                    urldata = self.urldata + '&page=' + str(i)
                    raw = browser.open_novisit(urldata, timeout=timeout).read()
                except Exception, e:
                    continue
                if '<title>404 - ' in raw:
                    continue
                raw = xml_to_unicode(raw, strip_encoding_pats=True,
                        resolve_entities=True)[0]
                try:
                    feed = soupparser.fromstring(raw)
                except:
                    try:
                        #remove ASCII invalid chars
                        return soupparser.fromstring(clean_ascii_chars(raw))
                    except:
                        continue
                pages.append(feed)
        results = []
        for x in pages:
            results.extend([i.getparent().get('href') \
                for i in x.xpath("//a/span[@class='srTitle']")])
        return results[:self.max_results], self.baseurl
 class ResultList(object):
    def __init__(self, baseurl, lang = 'all'):
        self.baseurl = baseurl
        self.lang = lang
        self.thread = []
        self.res = []
        self.nbtag = 0
        self.repub = re.compile(u'\((.*)\)')
        self.rerat = re.compile(u'([0-9.]+)')
        self.reattr = re.compile(r'<([a-zA-Z0-9]+)\s[^>]+>')
        self.reoutp = re.compile(r'(?s)<em>--This text ref.*?</em>')
        self.recom = re.compile(r'(?s)<!--.*?-->')
        self.republi = re.compile(u'(Editeur|Publisher|Verlag)', re.I)
        self.reisbn = re.compile(u'(ISBN-10|ISBN-10|ASIN)', re.I)
        self.relang = re.compile(u'(Language|Langue|Sprache)', re.I)
        self.reratelt = re.compile(u'(Average\s*Customer\s*Review|Moyenne\s*des\s*commentaires\s*client|Durchschnittliche\s*Kundenbewertung)', re.I)
        self.reprod = re.compile(u'(Product\s*Details|D.tails\s*sur\s*le\s*produit|Produktinformation)', re.I)
    def strip_tags_etree(self, etreeobj, invalid_tags):
        for (itag, rmv) in invalid_tags.iteritems():
            if rmv:
                for elts in etreeobj.getiterator(itag):
                    elts.drop_tree()
            else:
                for elts in etreeobj.getiterator(itag):
                    elts.drop_tag()
    def clean_entry(self, entry, invalid_tags = {'script': True},
                invalid_id = (), invalid_class=()):
        #invalid_tags: remove tag and keep content if False else remove
        #remove tags
        if invalid_tags:
            self.strip_tags_etree(entry, invalid_tags)
        #remove id
        if invalid_id:
            for eltid in invalid_id:
                elt = entry.get_element_by_id(eltid)
                if elt is not None:
                    elt.drop_tree()
        #remove class
        if invalid_class:
            for eltclass in invalid_class:
                elts = entry.find_class(eltclass)
                if elts is not None:
                    for elt in elts:
                        elt.drop_tree()
    def get_title(self, entry):
        title = entry.get_element_by_id('btAsinTitle')
        if title is not None:
            title = title.text
        return unicode(title.replace('\n', '').strip())
    def get_authors(self, entry):
        author = entry.get_element_by_id('btAsinTitle')
        while author.getparent().tag != 'div':
            author = author.getparent()
        author = author.getparent()
        authortext = []
        for x in author.getiterator('a'):
            authortext.append(unicode(x.text_content().strip()))
        return authortext
    def get_description(self, entry, verbose):
        try:
            description = entry.get_element_by_id("productDescription").find("div[@class='content']")
            inv_class = ('seeAll', 'emptyClear')
            inv_tags ={'img': True, 'a': False}
            self.clean_entry(description, invalid_tags=inv_tags, invalid_class=inv_class)
            description = tostring(description, method='html', encoding=unicode).strip()
            # remove all attributes from tags
            description = self.reattr.sub(r'<\1>', description)
            # Remove the notice about text referring to out of print editions
            description = self.reoutp.sub('', description)
            # Remove comments
            description = self.recom.sub('', description)
            return unicode(sanitize_comments_html(description))
        except:
            report(verbose)
            return None
    def get_tags(self, entry, verbose):
        try:
            tags = entry.get_element_by_id('tagContentHolder')
            testptag = tags.find_class('see-all')
            if testptag:
                for x in testptag:
                    alink = x.xpath('descendant-or-self::a')
                    if alink:
                        if alink[0].get('class') == 'tgJsActive':
                            continue
                        return self.baseurl + alink[0].get('href'), True
            tags = [a.text for a in tags.getiterator('a') if a.get('rel') == 'tag']
        except:
            report(verbose)
            tags = [], False
        return tags, False
    def get_book_info(self, entry, mi, verbose):
        try:
            entry = entry.get_element_by_id('SalesRank').getparent()
        except:
            try:
                for z in entry.getiterator('h2'):
                    if self.reprod.search(z.text_content()):
                        entry = z.getparent().find("div[@class='content']/ul")
                        break
            except:
                report(verbose)
                return mi
        elts = entry.findall('li')
        #pub & date
        elt = filter(lambda x: self.republi.search(x.find('b').text), elts)
        if elt:
            pub = elt[0].find('b').tail
            mi.publisher = unicode(self.repub.sub('', pub).strip())
            d = self.repub.search(pub)
            if d is not None:
                d = d.group(1)
                try:
                    default = utcnow().replace(day=15)
                    if self.lang != 'all':
                        d = replace_months(d, self.lang)
                    d = parse_date(d, assume_utc=True, default=default)
                    mi.pubdate = d
                except:
                    report(verbose)
        #ISBN
        elt = filter(lambda x: self.reisbn.search(x.find('b').text), elts)
        if elt:
            isbn = elt[0].find('b').tail.replace('-', '').strip()
            if check_isbn(isbn):
                    mi.isbn = unicode(isbn)
            elif len(elt) > 1:
                isbnone = elt[1].find('b').tail.replace('-', '').strip()
                if check_isbn(isbnone):
                    mi.isbn = unicode(isbnone)
            else:
                #assume ASIN-> find a check for asin
                mi.isbn = unicode(isbn)
        #Langue
        elt = filter(lambda x: self.relang.search(x.find('b').text), elts)
        if elt:
            langue = elt[0].find('b').tail.strip()
            if langue:
                mi.language = unicode(langue)
        #ratings
        elt = filter(lambda x: self.reratelt.search(x.find('b').text), elts)
        if elt:
            ratings = elt[0].find_class('swSprite')
            if ratings:
                ratings = self.rerat.findall(ratings[0].get('title'))
                if len(ratings) == 2:
                    mi.rating = float(ratings[0])/float(ratings[1]) * 5
        return mi
    def fill_MI(self, entry, verbose):
        try:
            title = self.get_title(entry)
            authors = self.get_authors(entry)
        except Exception, e:
            if verbose:
                print _('Failed to get all details for an entry')
                print e
                print _('URL who failed: %s') % x
                report(verbose)
            return None
        mi = MetaInformation(title, authors)
        mi.author_sort = authors_to_sort_string(authors)
        try:
            mi.comments = self.get_description(entry, verbose)
            mi = self.get_book_info(entry, mi, verbose)
        except:
            pass
        return mi
    def get_individual_metadata(self, url, br, verbose):
        try:
            raw = br.open_novisit(url).read()
        except Exception, e:
            report(verbose)
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                return None
            if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
                raise AmazonError(_('Amazon timed out. Try again later.'))
            raise AmazonError(_('Amazon encountered an error.'))
        if '<title>404 - ' in raw:
            report(verbose)
            return None
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
                resolve_entities=True)[0]
        try:
            return soupparser.fromstring(raw)
        except:
            try:
                #remove ASCII invalid chars
                return soupparser.fromstring(clean_ascii_chars(raw))
            except:
                report(verbose)
                return None
    def fetchdatathread(self, qbr, qsync, nb, url, verbose):
        try:
            browser = qbr.get(True)
            entry = self.get_individual_metadata(url, browser, verbose)
        except:
            report(verbose)
            entry = None
        finally:
            qbr.put(browser, True)
            qsync.put(nb, True)
            return entry
    def producer(self, sync, urls, br, verbose=False):
        for i in xrange(len(urls)):
            thread = ThreadwithResults(self.fetchdatathread, br, sync,
                                            i, urls[i], verbose)
            thread.start()
            self.thread.append(thread)
    def consumer(self, sync, syncbis, br, total_entries, verbose=False):
        i=0
        while i < total_entries:
            nb = int(sync.get(True))
            self.thread[nb].join()
            entry = self.thread[nb].get_result()
            i+=1
            if entry is not None:
                mi = self.fill_MI(entry, verbose)
                if mi is not None:
                    mi.tags, atag = self.get_tags(entry, verbose)
                    self.res[nb] = mi
                    if atag:
                        threadbis = ThreadwithResults(self.fetchdatathread,
                                        br, syncbis, nb, mi.tags, verbose)
                        self.thread[nb] = threadbis
                        self.nbtag +=1
                        threadbis.start()
    def populate(self, entries, ibr, verbose=False, brcall=3):
        br = Queue(brcall)
        cbr = Queue(brcall-1)
        syncp = Queue(1)
        syncc = Queue(len(entries))
        for i in xrange(brcall-1):
            br.put(browser(), True)
            cbr.put(browser(), True)
        br.put(ibr, True)
        self.res = [None]*len(entries)
        prod_thread = Thread(target=self.producer, args=(syncp, entries, br, verbose))
        cons_thread = Thread(target=self.consumer, args=(syncp, syncc, cbr, len(entries), verbose))
        prod_thread.start()
        cons_thread.start()
        prod_thread.join()
        cons_thread.join()
        #finish processing
        for i in xrange(self.nbtag):
            nb = int(syncc.get(True))
            tags = self.thread[nb].get_result()
            if tags is not None:
                self.res[nb].tags = self.get_tags(tags, verbose)[0]
        return self.res
 def search(title=None, author=None, publisher=None, isbn=None,
           max_results=5, verbose=False, keywords=None, lang='all'):
    br = browser()
    entries, baseurl = Query(title=title, author=author, isbn=isbn, publisher=publisher,
        keywords=keywords, max_results=max_results,rlang=lang)(br, verbose)
    if entries is None or len(entries) == 0:
        return None
    #List of entry
    ans = ResultList(baseurl, lang)
    return [x for x in ans.populate(entries, br, verbose) if x is not None]
 def get_social_metadata(title, authors, publisher, isbn, verbose=False,
        max_results=1, lang='all'):
    mi = MetaInformation(title, authors)
    if not isbn or not check_isbn(isbn):
        return [mi]
    amazresults = search(isbn=isbn, verbose=verbose,
                max_results=max_results, lang=lang)
    if amazresults is None or amazresults[0] is None:
        from calibre.ebooks.metadata.xisbn import xisbn
        for i in xisbn.get_associated_isbns(isbn):
            amazresults = search(isbn=i, verbose=verbose,
                max_results=max_results, lang=lang)
            if amazresults is not None and amazresults[0] is not None:
                break
    if amazresults is None or amazresults[0] is None:
        return [mi]
    miaz = amazresults[0]
    if miaz.rating is not None:
        mi.rating = miaz.rating
    if miaz.comments is not None:
        mi.comments = miaz.comments
    if miaz.tags is not None:
        mi.tags = miaz.tags
    return [mi]
 def option_parser():
    parser = OptionParser(textwrap.dedent(\
    _('''\
        %prog [options]
        Fetch book metadata from Amazon. You must specify one of title, author,
        ISBN, publisher or keywords. Will fetch a maximum of 20 matches,
        so you should make your query as specific as possible.
        You can chose the language for metadata retrieval:
        english & french & german
    '''
    )))
    parser.add_option('-t', '--title', help=_('Book title'))
    parser.add_option('-a', '--author', help=_('Book author(s)'))
    parser.add_option('-p', '--publisher', help=_('Book publisher'))
    parser.add_option('-i', '--isbn', help=_('Book ISBN'))
    parser.add_option('-k', '--keywords', help=_('Keywords'))
    parser.add_option('-s', '--social', default=0, action='count',
                      help=_('Get social data only'))
    parser.add_option('-m', '--max-results', default=10,
                      help=_('Maximum number of results to fetch'))
    parser.add_option('-l', '--lang', default='all',
                      help=_('Chosen language for metadata search (en, fr, de)'))
    parser.add_option('-v', '--verbose', default=0, action='count',
                      help=_('Be more verbose about errors'))
    return parser
 def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
    try:
        if opts.social:
            results = get_social_metadata(opts.title, opts.author,
                opts.publisher, opts.isbn, verbose=opts.verbose, lang=opts.lang)
        else:
            results = search(opts.title, opts.author, isbn=opts.isbn,
                publisher=opts.publisher, keywords=opts.keywords, verbose=opts.verbose,
                    max_results=opts.max_results, lang=opts.lang)
    except AssertionError:
        report(True)
        parser.print_help()
        return 1
    if results is None and len(results) == 0:
        print _('No result found for this search!')
        return 0
    for result in results:
        print unicode(result).encode(preferred_encoding, 'replace')
        print
    #test social
    # '''Test xisbn'''
    # print get_social_metadata('Learning Python', None, None, '8324616489')[0]
    # print
    # '''Test sophisticated comment formatting'''
    # print get_social_metadata('Angels & Demons', None, None, '9781416580829')[0]
    # print
    # '''Random tests'''
    # print get_social_metadata('Star Trek: Destiny: Mere Mortals', None, None, '9781416551720')[0]
    # print
    # print get_social_metadata('The Great Gatsby', None, None, '0743273567')[0]
 if __name__ == '__main__':
    sys.exit(main())
    # import cProfile
    # sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()"))
    # sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()", "profile_tmp_2"))
 # calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazonbis.py" -m 5 -a gore -v>data.html