...

2025-08-11 09:13:57 -04:00 · 2011-08-18 16:55:35 -06:00 · 2011-08-18 16:55:35 -06:00 · 9076fe4a13
commit 9076fe4a13
parent 65a2931f68
1 changed files with 48 additions and 51 deletions
--- a/src/calibre/ebooks/metadata/sources/ozon.py
+++ b/src/calibre/ebooks/metadata/sources/ozon.py
@ -1,6 +1,5 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import, print_function)
-from xml.etree.ElementTree import _Element

 __license__ = 'GPL 3'
 __copyright__ = '2011, Roman Mukhin <ramses_ru at hotmail.com>'
@ -12,10 +11,8 @@ import datetime
 from urllib import quote_plus
 from Queue import Queue, Empty
 from lxml import etree, html
-from lxml.etree import ElementBase
 from calibre import as_unicode

-from calibre import prints
 from calibre.ebooks.chardet import xml_to_unicode

 from calibre.ebooks.metadata import check_isbn
@ -27,16 +24,16 @@ class Ozon(Source):
    description = _('Downloads metadata and covers from OZON.ru')

    capabilities = frozenset(['identify', 'cover'])
-    
+
    touched_fields = frozenset(['title', 'authors', 'identifier:isbn', 'identifier:ozon',
                               'publisher', 'pubdate', 'comments', 'series', 'rating', 'language'])
    # Test purpose only, test function does not like when sometimes some filed are empty
    #touched_fields = frozenset(['title', 'authors', 'identifier:isbn', 'identifier:ozon',
-    #                          'publisher', 'pubdate', 'comments']) 
+    #                          'publisher', 'pubdate', 'comments'])

    supports_gzip_transfer_encoding = True
    has_html_comments = True
-    
+
    ozon_url = 'http://www.ozon.ru'

    # match any ISBN10/13. From "Regular Expressions Cookbook"
@ -53,11 +50,11 @@ class Ozon(Source):
            res = ('ozon', ozon_id, url)
        return res
    # }}}
-    
+
    def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
        # div_book -> search only books, ebooks and audio books
        search_url = self.ozon_url + '/webservice/webservice.asmx/SearchWebService?searchContext=div_book&searchText='
-        
+
        isbn = _format_isbn(log, identifiers.get('isbn', None))
        # TODO: format isbn!
        qItems = set([isbn, title])
@ -66,7 +63,7 @@ class Ozon(Source):
        qItems.discard(None)
        qItems.discard('')
        qItems = map(_quoteString, qItems)
-        
+
        q = ' '.join(qItems).strip()
        log.info(u'search string: ' + q)

@ -74,10 +71,10 @@ class Ozon(Source):
            q = q.encode('utf-8')
        if not q:
            return None
-        
+
        search_url += quote_plus(q)
        log.debug(u'search url: %r'%search_url)
-        
+
        return search_url
    # }}}

@ -93,11 +90,11 @@ class Ozon(Source):

        try:
            raw = self.browser.open_novisit(query).read()
-            
+
        except Exception as e:
            log.exception(u'Failed to make identify query: %r'%query)
            return as_unicode(e)
-        
+
        try:
            parser = etree.XMLParser(recover=True, no_network=True)
            feed = etree.fromstring(xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True)[0], parser=parser)
@ -110,14 +107,14 @@ class Ozon(Source):
            return as_unicode(e)

    # }}}
-    
+
    def get_metadata(self, log, entries, title, authors, identifiers): # {{{
        title = unicode(title).upper() if title else ''
        authors = map(unicode.upper, map(unicode, authors)) if authors else None
        ozon_id = identifiers.get('ozon', None)
-        
+
        unk = unicode(_('Unknown')).upper()
-        
+
        if title == unk:
            title = None

@ -129,7 +126,7 @@ class Ozon(Source):
                for miauthor in miauthors:
                    if author in miauthor: return True
            return None
-        
+
        def ensure_metadata_match(mi): # {{{
            match = True
            if title:
@ -138,13 +135,13 @@ class Ozon(Source):
            if match and authors:
                miauthors = map(unicode.upper, map(unicode, mi.authors)) if mi.authors else []
                match = in_authors(authors, miauthors)
-            
+
            if match and ozon_id:
                mozon_id = mi.identifiers['ozon']
                match = ozon_id == mozon_id
-            
-            return match 
-        
+
+            return match
+
        metadata = []
        for i, entry in enumerate(entries):
            mi = self.to_metadata(log, entry)
@ -159,64 +156,64 @@ class Ozon(Source):

    def get_all_details(self, log, metadata, abort, result_queue, identifiers, timeout): # {{{
        req_isbn = identifiers.get('isbn', None)
-        
+
        for mi in metadata:
            if abort.is_set():
                break
            try:
                ozon_id = mi.identifiers['ozon']
-                
+
                try:
                    self.get_book_details(log, mi, timeout)
                except:
                    log.exception(u'Failed to get details for metadata: %s'%mi.title)
-                
+
                all_isbns = getattr(mi, 'all_isbns', [])
                if req_isbn and all_isbns and check_isbn(req_isbn) not in all_isbns:
                    log.debug(u'skipped, no requested ISBN %s found'%req_isbn)
                    continue
-                
+
                for isbn in all_isbns:
                    self.cache_isbn_to_identifier(isbn, ozon_id)
-                    
+
                if mi.ozon_cover_url:
                    self.cache_identifier_to_cover_url(ozon_id, mi.ozon_cover_url)
-                    
+
                self.clean_downloaded_metadata(mi)
                result_queue.put(mi)
            except:
                log.exception(u'Failed to get details for metadata: %s'%mi.title)
    # }}}
-    
+
    def to_metadata(self, log, entry): # {{{
        xp_template = 'normalize-space(./*[local-name() = "{0}"]/text())'
-        
+
        title = entry.xpath(xp_template.format('Name'))
        author = entry.xpath(xp_template.format('Author'))
        mi = Metadata(title, author.split(','))
-        
+
        ozon_id = entry.xpath(xp_template.format('ID'))
        mi.identifiers = {'ozon':ozon_id}
-        
+
        mi.comments = entry.xpath(xp_template.format('Annotation'))
-        
+
        mi.ozon_cover_url = None
        cover = entry.xpath(xp_template.format('Picture'))
        if cover:
-            mi.ozon_cover_url = _translateToBigCoverUrl(cover) 
-            
+            mi.ozon_cover_url = _translateToBigCoverUrl(cover)
+
        rating = entry.xpath(xp_template.format('ClientRatingValue'))
        if rating:
            try:
                #'rating',     A floating point number between 0 and 10
-                # OZON raion N of 5, calibre of 10, but there is a bug? in identify 
+                # OZON raion N of 5, calibre of 10, but there is a bug? in identify
                mi.rating = float(rating)
            except:
                pass
            rating
        return mi
    # }}}
-    
+
    def get_cached_cover_url(self, identifiers): # {{{
        url = None
        ozon_id = identifiers.get('ozon', None)
@ -248,14 +245,14 @@ class Ozon(Source):
                cached_url = self.get_cached_cover_url(mi.identifiers)
                if cached_url is not None:
                    break
-       
+
        if cached_url is None:
            log.info('No cover found')
            return

        if abort.is_set():
            return
-        
+
        log.debug('Downloading cover from:', cached_url)
        try:
            cdata = self.browser.open_novisit(cached_url, timeout=timeout).read()
@ -265,10 +262,10 @@ class Ozon(Source):
            log.exception(u'Failed to download cover from: %s'%cached_url)
            return as_unicode(e)
    # }}}
-    
+
    def get_book_details(self, log, metadata, timeout): # {{{
        url = self.get_book_url(metadata.get_identifiers())[2]
-        
+
        raw = self.browser.open_novisit(url, timeout=timeout).read()
        doc = html.fromstring(raw)

@ -298,14 +295,14 @@ class Ozon(Source):
                if matcher:
                    year = int(matcher.group(0))
                    # only year is available, so use 1-st of Jan
-                    metadata.pubdate = datetime.datetime(year, 1, 1) #<- failed comparation in identify.py 
+                    metadata.pubdate = datetime.datetime(year, 1, 1) #<- failed comparation in identify.py
                    #metadata.pubdate = datetime(year, 1, 1)
            xpt = u'substring-after(string(../text()[contains(., "Язык")]), ": ")'
            displLang = publishers[0].xpath(xpt)
            lang_code =_translageLanguageToCode(displLang)
            if lang_code:
                metadata.language = lang_code
-        
+
        # overwrite comments from HTML if any
        # tr/td[contains(.//text(), "От издателя")] -> does not work, why?
        xpt = u'//div[contains(@class, "detail")]//tr/td//text()[contains(., "От издателя")]'\
@ -323,14 +320,14 @@ class Ozon(Source):
    # }}}

 def _quoteString(str): # {{{
-    return '"' + str + '"' if str and str.find(' ') != -1 else str 
+    return '"' + str + '"' if str and str.find(' ') != -1 else str
 # }}}

 # TODO: make customizable
 def _translateToBigCoverUrl(coverUrl): # {{{
    # http://www.ozon.ru/multimedia/books_covers/small/1002986468.gif
    # http://www.ozon.ru/multimedia/books_covers/1002986468.jpg
-    
+
    m = re.match(r'^(.+\/)small\/(.+\.).+$', coverUrl)
    if m:
        coverUrl = m.group(1) + m.group(2) + 'jpg'
@ -339,12 +336,12 @@ def _translateToBigCoverUrl(coverUrl): # {{{

 def _get_affiliateId(): # {{{
    import random
-    
+
    aff_id = 'romuk'
    # Use Kovid's affiliate id 30% of the time.
    if random.randint(1, 10) in (1, 2, 3):
        aff_id = 'kovidgoyal'
-    return aff_id 
+    return aff_id
 # }}}

 # for now only RUS ISBN are supported
@ -387,10 +384,10 @@ def _format_isbn(log, isbn):  # {{{
 def _translageLanguageToCode(displayLang): # {{{
    displayLang = unicode(displayLang).strip() if displayLang else None
    langTbl = {  None: 'ru',
-                u'Немецкий': 'de', 
-                u'Английский': 'en', 
+                u'Немецкий': 'de',
+                u'Английский': 'en',
                u'Французский': 'fr',
-                u'Итальянский': 'it', 
+                u'Итальянский': 'it',
                u'Испанский': 'es',
                u'Китайский': 'zh',
                u'Японский': 'ja' }
@ -406,7 +403,7 @@ if __name__ == '__main__': # tests {{{

    test_identify_plugin(Ozon.name,
        [
-         
+
            (
                {'identifiers':{'isbn': '9785916572629'} },
                [title_test(u'На все четыре стороны', exact=True),
@ -442,4 +439,4 @@ if __name__ == '__main__': # tests {{{
                [title_test(u'Метро', exact=False)]
            ),
    ])
-# }}}
+# }}}