New google and amazon metadata plugins finished

2025-07-09 03:04:10 -04:00 · 2011-03-16 22:00:45 -06:00 · 2011-03-16 22:00:45 -06:00 · c53f66f752
commit c53f66f752
parent b3a633d3ed
4 changed files with 31 additions and 17 deletions
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@ -227,6 +227,11 @@ class Metadata(object):
        if val:
            identifiers[typ] = val
    def has_identifier(self, typ):
        identifiers = object.__getattribute__(self,
            '_data')['identifiers']
        return typ in identifiers
    # field-oriented interface. Intended to be the same as in LibraryDatabase
    def standard_field_keys(self):
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@ -22,7 +22,7 @@ from calibre.ebooks.metadata.book.base import Metadata
 from calibre.library.comments import sanitize_comments_html
 from calibre.utils.date import parse_date
-class Worker(Thread):
+class Worker(Thread): # {{{
    '''
    Get book details from amazons book page in a separate thread
@ -253,7 +253,7 @@ class Worker(Thread):
                ans = x.tail.strip()
                if ans == 'English':
                    return 'en'
-
+# }}}
 class Amazon(Source):
@ -270,7 +270,7 @@ class Amazon(Source):
            'de' : _('Germany'),
    }
-    def create_query(self, log, title=None, authors=None, identifiers={}):
+    def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
        domain = self.prefs.get('domain', 'com')
        # See the amazon detailed search page to get all options
@ -313,8 +313,9 @@ class Amazon(Source):
        url = 'http://www.amazon.%s/s/?'%domain + urlencode(utf8q)
        return url
    # }}}
-    def identify(self, log, result_queue, abort, title=None, authors=None,
+    def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
            identifiers={}, timeout=20):
        '''
        Note this method will retry without identifiers automatically if no
@ -416,6 +417,7 @@ class Amazon(Source):
                            w.cover_url)
        return None
    # }}}
 if __name__ == '__main__':
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@ -42,7 +42,7 @@ subject        = XPath('descendant::dc:subject')
 description    = XPath('descendant::dc:description')
 language       = XPath('descendant::dc:language')
-def get_details(browser, url, timeout):
+def get_details(browser, url, timeout): # {{{
    try:
        raw = browser.open_novisit(url, timeout=timeout).read()
    except Exception as e:
@ -54,8 +54,9 @@ def get_details(browser, url, timeout):
        raw = browser.open_novisit(url, timeout=timeout).read()
    return raw
 # }}}
-def to_metadata(browser, log, entry_, timeout):
+def to_metadata(browser, log, entry_, timeout): # {{{
    def get_text(extra, x):
        try:
@ -94,12 +95,6 @@ def to_metadata(browser, log, entry_, timeout):
    #mi.language = get_text(extra, language)
    mi.publisher = get_text(extra, publisher)
    # Author sort
    for x in creator(extra):
        for key, val in x.attrib.items():
            if key.endswith('file-as') and val and val.strip():
                mi.author_sort = val
                break
    # ISBN
    isbns = []
    for x in identifier(extra):
@ -137,7 +132,7 @@ def to_metadata(browser, log, entry_, timeout):
    return mi
-
+# }}}
 class GoogleBooks(Source):
@ -146,10 +141,10 @@ class GoogleBooks(Source):
    capabilities = frozenset(['identify'])
    touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
-        'comments', 'publisher', 'author_sort', 'identifier:isbn',
+        'comments', 'publisher', 'identifier:isbn',
        'identifier:google']) # language currently disabled
-    def create_query(self, log, title=None, authors=None, identifiers={}):
+    def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
        BASE_URL = 'http://books.google.com/books/feeds/volumes?'
        isbn = check_isbn(identifiers.get('isbn', None))
        q = ''
@ -177,6 +172,7 @@ class GoogleBooks(Source):
            'start-index':1,
            'min-viewability':'none',
            })
    # }}}
    def cover_url_from_identifiers(self, identifiers):
        goog = identifiers.get('google', None)
@ -209,11 +205,11 @@ class GoogleBooks(Source):
            if abort.is_set():
                break
-    def identify(self, log, result_queue, abort, title=None, authors=None,
+    def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
            identifiers={}, timeout=20):
        query = self.create_query(log, title=title, authors=authors,
                identifiers=identifiers)
-        br = self.browser()
+        br = self.browser
        try:
            raw = br.open_novisit(query, timeout=timeout).read()
        except Exception, e:
@ -234,6 +230,7 @@ class GoogleBooks(Source):
        self.get_all_details(br, log, entries, abort, result_queue, timeout)
        return None
    # }}}
 if __name__ == '__main__':
    # To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/google.py
--- a/src/calibre/ebooks/metadata/sources/test.py
+++ b/src/calibre/ebooks/metadata/sources/test.py
@ -102,6 +102,16 @@ def test_identify_plugin(name, tests):
            prints('Log saved to', lf)
            raise SystemExit(1)
    for key in plugin.touched_fields:
        if key.startswith('identifier:'):
            key = key.partition(':')[-1]
            if not match_found.has_identifier(key):
                prints('Failed to find identifier:', key)
                raise SystemExit(1)
        elif match_found.is_null(key):
            prints('Failed to find', key)
            raise SystemExit(1)
    prints('Average time per query', sum(times)/len(times))
    if os.stat(lf).st_size > 10: