From eff4d0b72e29aba2e22dad58826ed624db1d9a19 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 8 Sep 2020 08:20:32 +0530
Subject: [PATCH] Remove some metadata sources

These are in Languages I cannot read, therefore too much effort for me
to maintain. And no one else seems to be willing to maintain them to the
standard I expect for code calibre code, over the long term. If someone
does intend to maintain them, they should become third party plugins.

Fixes #1222 (Correct imports for OZON.ru plugin to be able run it using Python 3.x)
Fixes #1894751 [In Python 3.0, the HTMLParser module has been renamed to html.parser](https://bugs.launchpad.net/calibre/+bug/1894751)
---
 src/calibre/ebooks/metadata/sources/douban.py | 392 ----------
 src/calibre/ebooks/metadata/sources/ozon.py   | 715 ------------------
 2 files changed, 1107 deletions(-)
 delete mode 100644 src/calibre/ebooks/metadata/sources/douban.py
 delete mode 100644 src/calibre/ebooks/metadata/sources/ozon.py

diff --git a/src/calibre/ebooks/metadata/sources/douban.py b/src/calibre/ebooks/metadata/sources/douban.py
deleted file mode 100644
index 63de45c44a..0000000000
--- a/src/calibre/ebooks/metadata/sources/douban.py
+++ /dev/null
@@ -1,392 +0,0 @@
-#!/usr/bin/env python
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-__license__ = 'GPL v3'
-__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>; 2011, Li Fanxi <lifanxi@freemindworld.com>'
-__docformat__ = 'restructuredtext en'
-
-import time
-try:
-    from queue import Empty, Queue
-except ImportError:
-    from Queue import Empty, Queue
-
-from calibre.ebooks.metadata import check_isbn
-from calibre.ebooks.metadata.sources.base import Option, Source
-from calibre.ebooks.metadata.book.base import Metadata
-from calibre import as_unicode
-
-NAMESPACES = {
-    'openSearch': 'http://a9.com/-/spec/opensearchrss/1.0/',
-    'atom': 'http://www.w3.org/2005/Atom',
-    'db': 'https://www.douban.com/xmlns/',
-    'gd': 'http://schemas.google.com/g/2005'
-}
-
-
-def get_details(browser, url, timeout):  # {{{
-    try:
-        if Douban.DOUBAN_API_KEY:
-            url = url + "?apikey=" + Douban.DOUBAN_API_KEY
-        raw = browser.open_novisit(url, timeout=timeout).read()
-    except Exception as e:
-        gc = getattr(e, 'getcode', lambda: -1)
-        if gc() != 403:
-            raise
-        # Douban is throttling us, wait a little
-        time.sleep(2)
-        raw = browser.open_novisit(url, timeout=timeout).read()
-
-    return raw
-
-
-# }}}
-
-
-class Douban(Source):
-
-    name = 'Douban Books'
-    author = 'Li Fanxi, xcffl, jnozsc'
-    version = (3, 1, 0)
-    minimum_calibre_version = (2, 80, 0)
-
-    description = _(
-        'Downloads metadata and covers from Douban.com. '
-        'Useful only for Chinese language books.'
-    )
-
-    capabilities = frozenset(['identify', 'cover'])
-    touched_fields = frozenset([
-        'title', 'authors', 'tags', 'pubdate', 'comments', 'publisher',
-        'identifier:isbn', 'rating', 'identifier:douban'
-    ])  # language currently disabled
-    supports_gzip_transfer_encoding = True
-    cached_cover_url_is_reliable = True
-
-    DOUBAN_API_KEY = '054022eaeae0b00e0fc068c0c0a2102a'
-    DOUBAN_API_URL = 'https://api.douban.com/v2/book/search'
-    DOUBAN_BOOK_URL = 'https://book.douban.com/subject/%s/'
-
-    options = (
-        Option(
-            'include_subtitle_in_title', 'bool', True,
-            _('Include subtitle in book title:'),
-            _('Whether to append subtitle in the book title.')
-        ),
-    )
-
-    def to_metadata(self, browser, log, entry_, timeout):  # {{{
-        from calibre.utils.date import parse_date, utcnow
-
-        douban_id = entry_.get('id')
-        title = entry_.get('title')
-        description = entry_.get('summary')
-        # subtitle = entry_.get('subtitle')  # TODO: std metada doesn't have this field
-        publisher = entry_.get('publisher')
-        isbn = entry_.get('isbn13')  # ISBN11 is obsolute, use ISBN13
-        pubdate = entry_.get('pubdate')
-        authors = entry_.get('author')
-        book_tags = entry_.get('tags')
-        rating = entry_.get('rating')
-        cover_url = entry_.get('images', {}).get('large')
-        series = entry_.get('series')
-
-        if not authors:
-            authors = [_('Unknown')]
-        if not douban_id or not title:
-            # Silently discard this entry
-            return None
-
-        mi = Metadata(title, authors)
-        mi.identifiers = {'douban': douban_id}
-        mi.publisher = publisher
-        mi.comments = description
-        # mi.subtitle = subtitle
-
-        # ISBN
-        isbns = []
-        if isinstance(isbn, (type(''), bytes)):
-            if check_isbn(isbn):
-                isbns.append(isbn)
-        else:
-            for x in isbn:
-                if check_isbn(x):
-                    isbns.append(x)
-        if isbns:
-            mi.isbn = sorted(isbns, key=len)[-1]
-        mi.all_isbns = isbns
-
-        # Tags
-        mi.tags = [tag['name'] for tag in book_tags]
-
-        # pubdate
-        if pubdate:
-            try:
-                default = utcnow().replace(day=15)
-                mi.pubdate = parse_date(pubdate, assume_utc=True, default=default)
-            except:
-                log.error('Failed to parse pubdate %r' % pubdate)
-
-        # Ratings
-        if rating:
-            try:
-                mi.rating = float(rating['average']) / 2.0
-            except:
-                log.exception('Failed to parse rating')
-                mi.rating = 0
-
-        # Cover
-        mi.has_douban_cover = None
-        u = cover_url
-        if u:
-            # If URL contains "book-default", the book doesn't have a cover
-            if u.find('book-default') == -1:
-                mi.has_douban_cover = u
-
-        # Series
-        if series:
-            mi.series = series['title']
-
-        return mi
-
-    # }}}
-
-    def get_book_url(self, identifiers):  # {{{
-        db = identifiers.get('douban', None)
-        if db is not None:
-            return ('douban', db, self.DOUBAN_BOOK_URL % db)
-
-    # }}}
-
-    def create_query(self, log, title=None, authors=None, identifiers={}):  # {{{
-        try:
-            from urllib.parse import urlencode
-        except ImportError:
-            from urllib import urlencode
-        SEARCH_URL = 'https://api.douban.com/v2/book/search?count=10&'
-        ISBN_URL = 'https://api.douban.com/v2/book/isbn/'
-        SUBJECT_URL = 'https://api.douban.com/v2/book/'
-
-        q = ''
-        t = None
-        isbn = check_isbn(identifiers.get('isbn', None))
-        subject = identifiers.get('douban', None)
-        if isbn is not None:
-            q = isbn
-            t = 'isbn'
-        elif subject is not None:
-            q = subject
-            t = 'subject'
-        elif title or authors:
-
-            def build_term(prefix, parts):
-                return ' '.join(x for x in parts)
-
-            title_tokens = list(self.get_title_tokens(title))
-            if title_tokens:
-                q += build_term('title', title_tokens)
-            author_tokens = list(
-                self.get_author_tokens(authors, only_first_author=True)
-            )
-            if author_tokens:
-                q += ((' ' if q != '' else '') + build_term('author', author_tokens))
-            t = 'search'
-        q = q.strip()
-        if isinstance(q, type(u'')):
-            q = q.encode('utf-8')
-        if not q:
-            return None
-        url = None
-        if t == "isbn":
-            url = ISBN_URL + q
-        elif t == 'subject':
-            url = SUBJECT_URL + q
-        else:
-            url = SEARCH_URL + urlencode({
-                'q': q,
-            })
-        if self.DOUBAN_API_KEY and self.DOUBAN_API_KEY != '':
-            if t == "isbn" or t == "subject":
-                url = url + "?apikey=" + self.DOUBAN_API_KEY
-            else:
-                url = url + "&apikey=" + self.DOUBAN_API_KEY
-        return url
-
-    # }}}
-
-    def download_cover(
-        self,
-        log,
-        result_queue,
-        abort,  # {{{
-        title=None,
-        authors=None,
-        identifiers={},
-        timeout=30,
-        get_best_cover=False
-    ):
-        cached_url = self.get_cached_cover_url(identifiers)
-        if cached_url is None:
-            log.info('No cached cover found, running identify')
-            rq = Queue()
-            self.identify(
-                log,
-                rq,
-                abort,
-                title=title,
-                authors=authors,
-                identifiers=identifiers
-            )
-            if abort.is_set():
-                return
-            results = []
-            while True:
-                try:
-                    results.append(rq.get_nowait())
-                except Empty:
-                    break
-            results.sort(
-                key=self.identify_results_keygen(
-                    title=title, authors=authors, identifiers=identifiers
-                )
-            )
-            for mi in results:
-                cached_url = self.get_cached_cover_url(mi.identifiers)
-                if cached_url is not None:
-                    break
-        if cached_url is None:
-            log.info('No cover found')
-            return
-
-        if abort.is_set():
-            return
-        br = self.browser
-        log('Downloading cover from:', cached_url)
-        try:
-            cdata = br.open_novisit(cached_url, timeout=timeout).read()
-            if cdata:
-                result_queue.put((self, cdata))
-        except:
-            log.exception('Failed to download cover from:', cached_url)
-
-    # }}}
-
-    def get_cached_cover_url(self, identifiers):  # {{{
-        url = None
-        db = identifiers.get('douban', None)
-        if db is None:
-            isbn = identifiers.get('isbn', None)
-            if isbn is not None:
-                db = self.cached_isbn_to_identifier(isbn)
-        if db is not None:
-            url = self.cached_identifier_to_cover_url(db)
-
-        return url
-
-    # }}}
-
-    def get_all_details(
-        self,
-        br,
-        log,
-        entries,
-        abort,  # {{{
-        result_queue,
-        timeout
-    ):
-        for relevance, i in enumerate(entries):
-            try:
-                ans = self.to_metadata(br, log, i, timeout)
-                if isinstance(ans, Metadata):
-                    ans.source_relevance = relevance
-                    db = ans.identifiers['douban']
-                    for isbn in getattr(ans, 'all_isbns', []):
-                        self.cache_isbn_to_identifier(isbn, db)
-                    if ans.has_douban_cover:
-                        self.cache_identifier_to_cover_url(db, ans.has_douban_cover)
-                    self.clean_downloaded_metadata(ans)
-                    result_queue.put(ans)
-            except:
-                log.exception('Failed to get metadata for identify entry:', i)
-            if abort.is_set():
-                break
-
-    # }}}
-
-    def identify(
-        self,
-        log,
-        result_queue,
-        abort,
-        title=None,
-        authors=None,  # {{{
-        identifiers={},
-        timeout=30
-    ):
-        import json
-
-        query = self.create_query(
-            log, title=title, authors=authors, identifiers=identifiers
-        )
-        if not query:
-            log.error('Insufficient metadata to construct query')
-            return
-        br = self.browser
-        try:
-            raw = br.open_novisit(query, timeout=timeout).read()
-        except Exception as e:
-            log.exception('Failed to make identify query: %r' % query)
-            return as_unicode(e)
-        try:
-            j = json.loads(raw)
-        except Exception as e:
-            log.exception('Failed to parse identify results')
-            return as_unicode(e)
-        if 'books' in j:
-            entries = j['books']
-        else:
-            entries = []
-            entries.append(j)
-        if not entries and identifiers and title and authors and \
-                not abort.is_set():
-            return self.identify(
-                log,
-                result_queue,
-                abort,
-                title=title,
-                authors=authors,
-                timeout=timeout
-            )
-        # There is no point running these queries in threads as douban
-        # throttles requests returning 403 Forbidden errors
-        self.get_all_details(br, log, entries, abort, result_queue, timeout)
-
-        return None
-
-    # }}}
-
-
-if __name__ == '__main__':  # tests {{{
-    # To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/douban.py
-    from calibre.ebooks.metadata.sources.test import (
-        test_identify_plugin, title_test, authors_test
-    )
-    test_identify_plugin(
-        Douban.name, [
-            ({
-                'identifiers': {
-                    'isbn': '9787536692930'
-                },
-                'title': '三体',
-                'authors': ['刘慈欣']
-            }, [title_test('三体', exact=True),
-                authors_test(['刘慈欣'])]),
-            ({
-                'title': 'Linux内核修炼之道',
-                'authors': ['任桥伟']
-            }, [title_test('Linux内核修炼之道', exact=False)]),
-        ]
-    )
-# }}}
diff --git a/src/calibre/ebooks/metadata/sources/ozon.py b/src/calibre/ebooks/metadata/sources/ozon.py
deleted file mode 100644
index 271423715a..0000000000
--- a/src/calibre/ebooks/metadata/sources/ozon.py
+++ /dev/null
@@ -1,715 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-__license__ = 'GPL 3'
-__copyright__ = '2011-2013 Roman Mukhin <ramses_ru at hotmail.com>'
-__docformat__ = 'restructuredtext en'
-
-# To ensure bugfix and development of this metadata source please donate
-# bitcoins to 1E6CRSLY1uNstcZjLYZBHRVs1CPKbdi4ep
-
-import re
-try:
-    from queue import Empty, Queue
-except ImportError:
-    from Queue import Empty, Queue
-
-from calibre import as_unicode, replace_entities
-from calibre.ebooks.metadata import check_isbn
-from calibre.ebooks.metadata.sources.base import Source, Option
-from calibre.ebooks.metadata.book.base import Metadata
-
-
-class Ozon(Source):
-    name = 'OZON.ru'
-    minimum_calibre_version = (2, 80, 0)
-    version = (1, 1, 0)
-    description = _('Downloads metadata and covers from OZON.ru (updated)')
-
-    capabilities = frozenset(['identify', 'cover'])
-
-    touched_fields = frozenset(['title', 'authors', 'identifier:isbn', 'identifier:ozon',
-                                'publisher', 'pubdate', 'comments', 'series', 'rating', 'languages'])
-    # Test purpose only, test function does not like when sometimes some filed are empty
-    # touched_fields = frozenset(['title', 'authors', 'identifier:isbn', 'identifier:ozon',
-    #                          'publisher', 'pubdate', 'comments'])
-
-    supports_gzip_transfer_encoding = True
-    has_html_comments = True
-
-    ozon_url = 'https://www.ozon.ru'
-
-    # match any ISBN10/13. From "Regular Expressions Cookbook"
-    isbnPattern = r'(?:ISBN(?:-1[03])?:? )?(?=[-0-9 ]{17}|' \
-                  '[-0-9X ]{13}|[0-9X]{10})(?:97[89][- ]?)?[0-9]{1,5}[- ]?' \
-                  '(?:[0-9]+[- ]?){2}[0-9X]'
-    isbnRegex = re.compile(isbnPattern)
-
-    optkey_strictmatch = 'strict_result_match'
-    options = (
-        Option(optkey_strictmatch, 'bool', False,
-               _('Filter out less relevant hits from the search results'),
-               _(
-                   'Improve search result by removing less relevant hits. It can be useful to refine the search when there are many matches')),
-    )
-
-    def get_book_url(self, identifiers):  # {{{
-        try:
-            from urllib.parse import quote
-        except ImportError:
-            from urllib import quote
-        ozon_id = identifiers.get('ozon', None)
-        res = None
-        if ozon_id:
-            # no affiliateId is used in search/detail
-            url = '{}/context/detail/id/{}'.format(self.ozon_url, quote(ozon_id))
-            res = ('ozon', ozon_id, url)
-        return res
-
-    # }}}
-
-    def create_query(self, log, title=None, authors=None, identifiers={}):  # {{{
-        from urllib import quote_plus
-
-        # div_book -> search only books, ebooks and audio books
-        search_url = self.ozon_url + '/?context=search&group=div_book&text='
-
-        # for ozon.ru search we have to format ISBN with '-'
-        isbn = _format_isbn(log, identifiers.get('isbn', None))
-        if isbn and '-' not in isbn:
-            log.error(
-                "%s requires formatted ISBN for search. %s cannot be formated - removed. (only Russian ISBN format is supported now)"
-                % (self.name, isbn))
-            isbn = None
-
-        ozonid = identifiers.get('ozon', None)
-
-        qItems = {ozonid, isbn}
-
-        # Added Russian variant of 'Unknown'
-        unk = [_('Unknown').upper(), 'Неизв.'.upper(), icu_upper('Неизв.')]
-
-        # use only ozonid if specified otherwise ozon.ru does not like a combination
-        if not ozonid:
-            if title and title not in unk:
-                qItems.add(title)
-
-            if authors:
-                for auth in authors:
-                    if icu_upper(auth) not in unk:
-                        qItems.add(auth)
-
-        qItems.discard(None)
-        qItems.discard('')
-        searchText = u' '.join(qItems).strip()
-
-        if isinstance(searchText, type(u'')):
-            searchText = searchText.encode('utf-8')
-        if not searchText:
-            return None
-
-        search_url += quote_plus(searchText)
-        log.debug(u'search url: %s' % search_url)
-        return search_url
-
-    # }}}
-
-    def identify(self, log, result_queue, abort, title=None, authors=None,
-                 identifiers={}, timeout=90):  # {{{
-        from calibre.ebooks.chardet import xml_to_unicode
-        from HTMLParser import HTMLParser
-        from lxml import etree, html
-        import json
-
-        if not self.is_configured():
-            return
-        query = self.create_query(log, title=title, authors=authors, identifiers=identifiers)
-        if not query:
-            err = u'Insufficient metadata to construct query'
-            log.error(err)
-            return err
-
-        try:
-            raw = self.browser.open_novisit(query).read()
-        except Exception as e:
-            log.exception(u'Failed to make identify query: %r' % query)
-            return as_unicode(e)
-
-        try:
-            doc = html.fromstring(xml_to_unicode(raw, verbose=True)[0])
-            entries_block = doc.xpath(u'//div[@class="bSearchResult"]')
-
-            # log.debug(u'HTML: %s' % xml_to_unicode(raw, verbose=True)[0])
-
-            if entries_block:
-                entries = doc.xpath(u'//div[contains(@itemprop, "itemListElement")]')
-                # log.debug(u'entries_block')
-                # for entry in entries:
-                #   log.debug('entries %s' % entree.tostring(entry))
-                metadata = self.get_metadata(log, entries, title, authors, identifiers)
-                self.get_all_details(log, metadata, abort, result_queue, identifiers, timeout)
-            else:
-                # Redirect page: trying to extract ozon_id from javascript data
-                h = HTMLParser()
-                entry_string = (h.unescape(etree.tostring(doc, pretty_print=True, encoding='unicode')))
-                json_pat = re.compile(r'dataLayer\s*=\s*(.+)?;')
-                json_info = re.search(json_pat, entry_string)
-                jsondata = json_info.group(1) if json_info else None
-                if jsondata:
-                    idx = jsondata.rfind('}]')
-                    if idx > 0:
-                        jsondata = jsondata[:idx + 2]
-
-                # log.debug(u'jsondata: %s' % jsondata)
-                dataLayer = json.loads(jsondata) if jsondata else None
-
-                ozon_id = None
-                if dataLayer and dataLayer[0] and 'ecommerce' in dataLayer[0]:
-                    jsproduct = dataLayer[0]['ecommerce']['detail']['products'][0]
-                    ozon_id = as_unicode(jsproduct['id'])
-                    entry_title = as_unicode(jsproduct['name'])
-
-                    log.debug(u'ozon_id %s' % ozon_id)
-                    log.debug(u'entry_title %s' % entry_title)
-
-                    if ozon_id:
-                        metadata = self.to_metadata_for_single_entry(log, ozon_id, entry_title, authors)
-                        identifiers['ozon'] = ozon_id
-                        self.get_all_details(log, [metadata], abort, result_queue, identifiers, timeout, cachedPagesDict={})
-
-                if not ozon_id:
-                    log.error('No SearchResults in Ozon.ru response found!')
-
-        except Exception as e:
-            log.exception('Failed to parse identify results')
-            return as_unicode(e)
-
-    # }}}
-
-    def to_metadata_for_single_entry(self, log, ozon_id, title, authors):  # {{{
-
-        # parsing javascript data from the redirect page
-        mi = Metadata(title, authors)
-        mi.identifiers = {'ozon': ozon_id}
-
-        return mi
-
-    # }}}
-
-    def get_metadata(self, log, entries, title, authors, identifiers):  # {{{
-        # some book titles have extra characters like this
-
-        reRemoveFromTitle = re.compile(r'[?!:.,;+-/&%"\'=]')
-
-        title = type(u'')(title).upper() if title else ''
-        if reRemoveFromTitle:
-            title = reRemoveFromTitle.sub('', title)
-        authors = [
-            _normalizeAuthorNameWithInitials(type(u'')(a).upper()) for a in authors
-        ] if authors else None
-
-        ozon_id = identifiers.get('ozon', None)
-        # log.debug(u'ozonid: ', ozon_id)
-
-        unk = type(u'')(_('Unknown')).upper()
-
-        if title == unk:
-            title = None
-
-        if authors == [unk] or authors == []:
-            authors = None
-
-        def in_authors(authors, miauthors):
-            for author in authors:
-                for miauthor in miauthors:
-                    # log.debug(u'=> %s <> %s'%(author, miauthor))
-                    if author in miauthor:
-                        return True
-            return None
-
-        def calc_source_relevance(mi):  # {{{
-            relevance = 0
-            if title:
-                mititle = type(u'')(mi.title).upper() if mi.title else ''
-
-                if reRemoveFromTitle:
-                    mititle = reRemoveFromTitle.sub('', mititle)
-
-                if title in mititle:
-                    relevance += 3
-                elif mititle:
-                    # log.debug(u'!!%s!'%mititle)
-                    relevance -= 3
-            else:
-                relevance += 1
-
-            if authors:
-                miauthors = [type(u'')(a).upper() for a in mi.authors or ()]
-                # log.debug('Authors %s vs miauthors %s'%(','.join(authors), ','.join(miauthors)))
-
-                if (in_authors(authors, miauthors)):
-                    relevance += 3
-                elif u''.join(miauthors):
-                    # log.debug(u'!%s!'%u'|'.join(miauthors))
-                    relevance -= 3
-            else:
-                relevance += 1
-
-            if ozon_id:
-                mozon_id = mi.identifiers['ozon']
-                if ozon_id == mozon_id:
-                    relevance += 100
-
-            if relevance < 0:
-                relevance = 0
-            return relevance
-
-        # }}}
-
-        strict_match = self.prefs[self.optkey_strictmatch]
-        metadata = []
-        for entry in entries:
-
-            mi = self.to_metadata(log, entry)
-            relevance = calc_source_relevance(mi)
-            # TODO findout which is really used
-            mi.source_relevance = relevance
-            mi.relevance_in_source = relevance
-
-            if not strict_match or relevance > 0:
-                # getting rid of a random book that shows up in results
-                if not (mi.title == 'Unknown'):
-                    metadata.append(mi)
-                    # log.debug(u'added metadata %s %s.'%(mi.title,  mi.authors))
-            else:
-                log.debug(u'skipped metadata title: %s, authors: %s. (does not match the query - relevance score: %s)'
-                          % (mi.title, u' '.join(mi.authors), relevance))
-        return metadata
-
-    # }}}
-
-    def get_all_details(self, log, metadata, abort, result_queue, identifiers, timeout, cachedPagesDict={}):  # {{{
-
-        req_isbn = identifiers.get('isbn', None)
-
-        for mi in metadata:
-            if abort.is_set():
-                break
-            try:
-                ozon_id = mi.identifiers['ozon']
-
-                try:
-                    self.get_book_details(log, mi, timeout, cachedPagesDict[
-                        ozon_id] if cachedPagesDict and ozon_id in cachedPagesDict else None)
-                except:
-                    log.exception(u'Failed to get details for metadata: %s' % mi.title)
-
-                all_isbns = getattr(mi, 'all_isbns', [])
-                if req_isbn and all_isbns and check_isbn(req_isbn) not in all_isbns:
-                    log.debug(u'skipped, no requested ISBN %s found' % req_isbn)
-                    continue
-
-                for isbn in all_isbns:
-                    self.cache_isbn_to_identifier(isbn, ozon_id)
-
-                if mi.ozon_cover_url:
-                    self.cache_identifier_to_cover_url(ozon_id, mi.ozon_cover_url)
-
-                self.clean_downloaded_metadata(mi)
-                result_queue.put(mi)
-
-            except:
-                log.exception(u'Failed to get details for metadata: %s' % mi.title)
-
-    # }}}
-
-    def to_metadata(self, log, entry):  # {{{
-        title = type(u'')(entry.xpath(u'normalize-space(.//div[@itemprop="name"][1]/text())'))
-        # log.debug(u'Title: -----> %s' % title)
-
-        author = type(u'')(entry.xpath(u'normalize-space(.//div[contains(@class, "mPerson")])'))
-        # log.debug(u'Author: -----> %s' % author)
-
-        norm_authors = [_normalizeAuthorNameWithInitials(a.strip()) for a in type(u'')(author).split(u',')]
-        mi = Metadata(title, norm_authors)
-
-        ozon_id = entry.get('data-href').split('/')[-2]
-
-        if ozon_id:
-            mi.identifiers = {'ozon': ozon_id}
-            # log.debug(u'ozon_id: -----> %s' % ozon_id)
-
-        mi.ozon_cover_url = None
-        cover = entry.xpath(u'normalize-space(.//img[1]/@src)')
-        log.debug(u'cover: -----> %s' % cover)
-        if cover:
-            mi.ozon_cover_url = _translateToBigCoverUrl(cover)
-            # log.debug(u'mi.ozon_cover_url: -----> %s' % mi.ozon_cover_url)
-
-        pub_year = None
-        pub_year_block = entry.xpath(u'.//div[@class="bOneTileProperty"]/text()')
-        year_pattern = re.compile(r'\d{4}')
-        if pub_year_block:
-            pub_year = re.search(year_pattern, pub_year_block[0])
-            if pub_year:
-                mi.pubdate = toPubdate(log, pub_year.group())
-        # log.debug('pubdate %s' % mi.pubdate)
-
-        mi.rating = self.get_rating(log, entry)
-        # if not mi.rating:
-        #    log.debug('No rating found. ozon_id:%s'%ozon_id)
-
-        return mi
-
-    # }}}
-
-    def get_rating(self, log, entry):  # {{{
-        # log.debug(entry)
-        ozon_rating = None
-        try:
-            xp_rating_template = u'boolean(.//div[contains(@class, "bStars") and contains(@class, "%s")])'
-            rating = None
-            if entry.xpath(xp_rating_template % 'm5'):
-                rating = 5.
-            elif entry.xpath(xp_rating_template % 'm4'):
-                rating = 4.
-            elif entry.xpath(xp_rating_template % 'm3'):
-                rating = 3.
-            elif entry.xpath(xp_rating_template % 'm2'):
-                rating = 2.
-            elif entry.xpath(xp_rating_template % 'm1'):
-                rating = 1.
-            if rating:
-                # 'rating',     A floating point number between 0 and 10
-                # OZON raion N of 5, calibre of 10, but there is a bug? in identify
-                ozon_rating = float(rating)
-        except:
-            pass
-        return ozon_rating
-
-    # }}}
-
-    def get_cached_cover_url(self, identifiers):  # {{{
-        url = None
-        ozon_id = identifiers.get('ozon', None)
-        if ozon_id is None:
-            isbn = identifiers.get('isbn', None)
-            if isbn is not None:
-                ozon_id = self.cached_isbn_to_identifier(isbn)
-        if ozon_id is not None:
-            url = self.cached_identifier_to_cover_url(ozon_id)
-        return url
-
-    # }}}
-
-    def download_cover(self, log, result_queue, abort, title=None, authors=None, identifiers={}, timeout=30,
-                       get_best_cover=False):  # {{{
-
-        cached_url = self.get_cached_cover_url(identifiers)
-        if cached_url is None:
-            log.debug('No cached cover found, running identify')
-            rq = Queue()
-            self.identify(log, rq, abort, title=title, authors=authors, identifiers=identifiers)
-            if abort.is_set():
-                return
-            results = []
-            while True:
-                try:
-                    results.append(rq.get_nowait())
-                except Empty:
-                    break
-            results.sort(key=self.identify_results_keygen(title=title, authors=authors, identifiers=identifiers))
-            for mi in results:
-                cached_url = self.get_cached_cover_url(mi.identifiers)
-                if cached_url is not None:
-                    break
-
-        if cached_url is None:
-            log.info('No cover found')
-            return
-
-        if abort.is_set():
-            return
-
-        log.debug('Downloading cover from:', cached_url)
-        try:
-            cdata = self.browser.open_novisit(cached_url, timeout=timeout).read()
-            if cdata:
-                result_queue.put((self, cdata))
-        except Exception as e:
-            log.exception(u'Failed to download cover from: %s' % cached_url)
-            return as_unicode(e)
-
-    # }}}
-
-    def get_book_details(self, log, metadata, timeout, cachedPage):  # {{{
-        from lxml import etree, html
-        from calibre.ebooks.chardet import xml_to_unicode
-
-        if not cachedPage:
-            url = self.get_book_url(metadata.get_identifiers())[2]
-            # log.debug(u'book_details_url', url)
-
-            raw = self.browser.open_novisit(url, timeout=timeout).read()
-            fulldoc = html.fromstring(xml_to_unicode(raw, verbose=True)[0])
-        else:
-            fulldoc = cachedPage
-            log.debug(u'book_details -> using cached page')
-
-        fullString = etree.tostring(fulldoc)
-        doc = fulldoc.xpath(u'//div[@class="bDetailPage"][1]')[0]
-
-        # series Серия/Серии
-        series_elem = doc.xpath(u'//div[contains(text(), "Сери")]')
-        if series_elem:
-            series_text_elem = series_elem[0].getnext()
-            metadata.series = series_text_elem.xpath(u'.//a/text()')[0]
-            log.debug(u'**Seria: ', metadata.series)
-
-        isbn = None
-        isbn_elem = doc.xpath(u'//div[contains(text(), "ISBN")]')
-        if isbn_elem:
-            isbn = isbn_elem[0].getnext().xpath(u'normalize-space(./text())')
-            metadata.identifiers['isbn'] = isbn
-
-        # get authors/editors if no authors are available
-        authors_joined = ','.join(metadata.authors)
-
-        if authors_joined == '' or authors_joined == "Unknown":
-            authors_from_detail = []
-            editor_elem = doc.xpath(u'//div[contains(text(), "Редактор")]')
-            if editor_elem:
-                editor = editor_elem[0].getnext().xpath(u'.//a/text()')[0]
-                authors_from_detail.append(editor + u' (ред.)')
-            authors_elem = doc.xpath(u'//div[contains(text(), "Автор")]')
-            if authors_elem:
-                authors = authors_elem[0].getnext().xpath(u'.//a/text()')  # list
-                authors_from_detail.extend(authors)
-            if len(authors_from_detail) > 0:
-                metadata.authors = authors_from_detail
-
-        cover = doc.xpath('.//img[contains(@class, "fullImage")]/@src')[0]
-        metadata.ozon_cover_url = _translateToBigCoverUrl(cover)
-
-        publishers = None
-        publishers_elem = doc.xpath(u'//div[contains(text(), "Издатель")]')
-        if publishers_elem:
-            publishers_elem = publishers_elem[0].getnext()
-            publishers = publishers_elem.xpath(u'.//a/text()')[0]
-
-        if publishers:
-            metadata.publisher = publishers
-
-        displ_lang = None
-        langs = None
-        langs_elem = doc.xpath(u'//div[contains(text(), "зык")]')
-        if langs_elem:
-            langs_elem = langs_elem[0].getnext()
-            langs = langs_elem.xpath(u'text()')[0].strip() if langs_elem else None
-        if langs:
-            lng_splt = langs.split(u',')
-            if lng_splt:
-                displ_lang = lng_splt[0].strip()
-                # log.debug(u'displ_lang1: ', displ_lang)
-        metadata.language = _translageLanguageToCode(displ_lang)
-        # log.debug(u'Language: ', metadata.language)
-
-        # can be set before from xml search response
-        if not metadata.pubdate:
-            pubdate_elem = doc.xpath(u'//div[contains(text(), "Год выпуска")]')
-            if pubdate_elem:
-                pubYear = pubdate_elem[0].getnext().xpath(u'text()')[0].strip()
-                if pubYear:
-                    matcher = re.search(r'\d{4}', pubYear)
-                    if matcher:
-                        metadata.pubdate = toPubdate(log, matcher.group(0))
-        # log.debug(u'Pubdate: ', metadata.pubdate)
-
-        # comments, from Javascript data
-        beginning = fullString.find(u'FirstBlock')
-        end = fullString.find(u'}', beginning)
-        comments = type(u'')(fullString[beginning + 75:end - 1]).decode("unicode-escape")
-        metadata.comments = replace_entities(comments, 'utf-8')
-        # }}}
-
-
-def _verifyISBNIntegrity(log, isbn):  # {{{
-    # Online ISBN-Check http://www.isbn-check.de/
-    res = check_isbn(isbn)
-    if not res:
-        log.error(u'ISBN integrity check failed for "%s"' % isbn)
-    return res is not None
-
-
-# }}}
-
-# TODO: make customizable
-def _translateToBigCoverUrl(coverUrl):  # {{{
-    # //static.ozone.ru/multimedia/c200/1005748980.jpg
-    # http://www.ozon.ru/multimedia/books_covers/1009493080.jpg
-    m = re.match(r'.+\/([^\.\\]+).+$', coverUrl)
-    if m:
-        coverUrl = 'https://www.ozon.ru/multimedia/books_covers/' + m.group(1) + '.jpg'
-    return coverUrl
-
-
-# }}}
-
-def _get_affiliateId():  # {{{
-    import random
-
-    aff_id = 'romuk'
-    # Use Kovid's affiliate id 30% of the time.
-    if random.randint(1, 10) in (1, 2, 3):
-        aff_id = 'kovidgoyal'
-    return aff_id
-
-
-# }}}
-
-def _format_isbn(log, isbn):  # {{{
-    # for now only RUS ISBN are supported
-    # http://ru.wikipedia.org/wiki/ISBN_российских_издательств
-    isbn_pat = re.compile(r"""
-        ^
-        (\d{3})?            # match GS1 Prefix for ISBN13
-        (5)                 # group identifier for Russian-speaking countries
-        (                   # begin variable length for Publisher
-            [01]\d{1}|      # 2x
-            [2-6]\d{2}|     # 3x
-            7\d{3}|         # 4x (starting with 7)
-            8[0-4]\d{2}|    # 4x (starting with 8)
-            9[2567]\d{2}|   # 4x (starting with 9)
-            99[26]\d{1}|    # 4x (starting with 99)
-            8[5-9]\d{3}|    # 5x (starting with 8)
-            9[348]\d{3}|    # 5x (starting with 9)
-            900\d{2}|       # 5x (starting with 900)
-            91[0-8]\d{2}|   # 5x (starting with 91)
-            90[1-9]\d{3}|   # 6x (starting with 90)
-            919\d{3}|       # 6x (starting with 919)
-            99[^26]\d{4}    # 7x (starting with 99)
-        )                   # end variable length for Publisher
-        (\d+)               # Title
-        ([\dX])             # Check digit
-        $
-    """, re.VERBOSE)
-
-    res = check_isbn(isbn)
-    if res:
-        m = isbn_pat.match(res)
-        if m:
-            res = '-'.join([g for g in m.groups() if g])
-        else:
-            log.error('cannot format ISBN %s. Fow now only russian ISBNs are supported' % isbn)
-    return res
-
-# }}}
-
-
-def _translageLanguageToCode(displayLang):  # {{{
-    displayLang = type(u'')(displayLang).strip() if displayLang else None
-    langTbl = {None: 'ru',
-               u'Русский': 'ru',
-               u'Немецкий': 'de',
-               u'Английский': 'en',
-               u'Французский': 'fr',
-               u'Итальянский': 'it',
-               u'Испанский': 'es',
-               u'Китайский': 'zh',
-               u'Японский': 'ja',
-               u'Финский': 'fi',
-               u'Польский': 'pl',
-               u'Украинский': 'uk',}
-    return langTbl.get(displayLang, None)
-
-
-# }}}
-
-# [В.П. Колесников | Колесников В.П.]-> В. П. BКолесников
-def _normalizeAuthorNameWithInitials(name):  # {{{
-    res = name
-    if name:
-        re1 = r'^(?P<lname>\S+)\s+(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?$'
-        re2 = r'^(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?\s+(?P<lname>\S+)$'
-        matcher = re.match(re1, type(u'')(name), re.UNICODE)
-        if not matcher:
-            matcher = re.match(re2, type(u'')(name), re.UNICODE)
-
-        if matcher:
-            d = matcher.groupdict()
-            res = ' '.join(x for x in (d['fname'], d['mname'], d['lname']) if x)
-    return res
-
-
-# }}}
-
-def toPubdate(log, yearAsString):  # {{{
-    from calibre.utils.date import parse_only_date
-    res = None
-    if yearAsString:
-        try:
-            res = parse_only_date(u"01.01." + yearAsString)
-        except:
-            log.error('cannot parse to date %s' % yearAsString)
-    return res
-
-
-# }}}
-
-def _listToUnicodePrintStr(lst):  # {{{
-    return u'[' + u', '.join(type(u'')(x) for x in lst) + u']'
-
-
-# }}}
-
-if __name__ == '__main__':  # tests {{{
-    # To run these test use: calibre-debug src/calibre/ebooks/metadata/sources/ozon.py
-    # comment some touched_fields before run thoses tests
-    from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
-                                                      title_test, authors_test, isbn_test)
-
-    test_identify_plugin(Ozon.name, [
-    # (
-    #     {'identifiers':{}, 'title':u'Норвежский язык: Практический курс',
-    #         'authors':[u'Колесников В.П.', u'Г.В. Шатков']},
-    #     [title_test(u'Норвежский язык: Практический курс', exact=True),
-    #     authors_test([u'В. П. Колесников', u'Г. В. Шатков'])]
-    # ),
-    (
-        {'identifiers': {'isbn': '9785916572629'}},
-        [title_test(u'На все четыре стороны', exact=True),
-        authors_test([u'А. А. Гилл'])]
-    ),
-    (
-        {'identifiers': {}, 'title': u'Der Himmel Kennt Keine Gunstlinge',
-        'authors': [u'Erich Maria Remarque']},
-        [title_test(u'Der Himmel Kennt Keine Gunstlinge', exact=True),
-        authors_test([u'Erich Maria Remarque'])]
-    ),
-    (
-        {'identifiers': {}, 'title': u'Метро 2033',
-        'authors': [u'Дмитрий Глуховский']},
-        [title_test(u'Метро 2033', exact=False)]
-    ),
-    (
-        {'identifiers': {'isbn': '9785170727209'}, 'title': u'Метро 2033',
-        'authors': [u'Дмитрий Глуховский']},
-        [title_test(u'Метро 2033', exact=True),
-        authors_test([u'Дмитрий Глуховский']),
-        isbn_test('9785170727209')]
-    ),
-    (
-        {'identifiers': {'isbn': '5-699-13613-4'}, 'title': u'Метро 2033',
-        'authors': [u'Дмитрий Глуховский']},
-        [title_test(u'Метро 2033', exact=True),
-        authors_test([u'Дмитрий Глуховский'])]
-    ),
-    (
-        {'identifiers': {}, 'title': u'Метро',
-        'authors': [u'Глуховский']},
-        [title_test(u'Метро', exact=False)]
-    ),
-])
-# }}}