Amazon metadata download: Ignore Spanish edition entries when searching for a book on amazon.com

2025-07-09 03:04:10 -04:00 · 2013-04-09 17:07:24 +05:30 · 2013-04-09 17:07:24 +05:30 · 949b3c04f9
commit 949b3c04f9
parent 9fb122cd4b
1 changed files with 60 additions and 58 deletions
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@ -276,7 +276,6 @@ class Worker(Thread): # Get details {{{
            self.log.exception('Error parsing authors for url: %r'%self.url)
            authors = []
        if not title or not authors or not asin:
            self.log.error('Could not find title/authors/asin for %r'%self.url)
            self.log.error('ASIN: %r Title: %r Authors: %r'%(asin, title,
@ -431,7 +430,6 @@ class Worker(Thread): # Get details {{{
        desc = re.sub(r'(?s)<!--.*?-->', '', desc)
        return sanitize_comments_html(desc)
    def parse_comments(self, root):
        ans = ''
        desc = root.xpath('//div[@id="ps-content"]/div[@class="content"]')
@ -637,7 +635,6 @@ class Amazon(Source):
            mi.tags = list(map(fixcase, mi.tags))
        mi.isbn = check_isbn(mi.isbn)
    def create_query(self, log, title=None, authors=None, identifiers={},  # {{{
            domain=None):
        if domain is None:
@ -724,7 +721,10 @@ class Amazon(Source):
        def title_ok(title):
            title = title.lower()
-            for x in ('bulk pack', '[audiobook]', '[audio cd]'):
+            bad = ['bulk pack', '[audiobook]', '[audio cd]']
            if self.domain == 'com':
                bad.append('(spanish edition)')
            for x in bad:
                if x in title:
                    return False
            return True
@ -751,7 +751,6 @@ class Amazon(Source):
                        matches.append(a.get('href'))
                    break
        # Keep only the top 5 matches as the matches are sorted by relevance by
        # Amazon so lower matches are not likely to be very relevant
        return matches[:5]
@ -795,7 +794,6 @@ class Amazon(Source):
                log.exception(msg)
            return as_unicode(msg)
        raw = clean_ascii_chars(xml_to_unicode(raw,
            strip_encoding_pats=True, resolve_entities=True)[0])
@ -825,7 +823,6 @@ class Amazon(Source):
                    # The error is almost always a not found error
                    found = False
        if found:
            matches = self.parse_results_page(root)
@ -907,6 +904,11 @@ if __name__ == '__main__': # tests {{{
            isbn_test, title_test, authors_test, comments_test, series_test)
    com_tests = [  # {{{
            (  # Has a spanish edition
             {'title':'11/22/63'},
             [title_test('11/22/63: A Novel', exact=True), authors_test(['Stephen King']),]
             ),
            (  # + in title and uses id="main-image" for cover
             {'title':'C++ Concurrency in Action'},
             [title_test('C++ Concurrency in Action: Practical Multithreading',
@ -917,8 +919,8 @@ if __name__ == '__main__': # tests {{{
            (  # Series
                {'identifiers':{'amazon':'0756407117'}},
                [title_test(
-                "Throne of the Crescent Moon"
+                "Throne of the Crescent Moon",
-                , exact=True), series_test('Crescent Moon Kingdoms', 1),
+                exact=True), series_test('Crescent Moon Kingdoms', 1),
                comments_test('Makhslood'),
                ]
            ),
@ -926,8 +928,8 @@ if __name__ == '__main__': # tests {{{
            (  # Different comments markup, using Book Description section
                {'identifiers':{'amazon':'0982514506'}},
                [title_test(
-                "Griffin's Destiny: Book Three: The Griffin's Daughter Trilogy"
+                "Griffin's Destiny: Book Three: The Griffin's Daughter Trilogy",
-                , exact=True),
+                exact=True),
                comments_test('Jelena'), comments_test('Leslie'),
                ]
            ),