mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
...
This commit is contained in:
parent
65a2931f68
commit
9076fe4a13
@ -1,6 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||
from xml.etree.ElementTree import _Element
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2011, Roman Mukhin <ramses_ru at hotmail.com>'
|
||||
@ -12,10 +11,8 @@ import datetime
|
||||
from urllib import quote_plus
|
||||
from Queue import Queue, Empty
|
||||
from lxml import etree, html
|
||||
from lxml.etree import ElementBase
|
||||
from calibre import as_unicode
|
||||
|
||||
from calibre import prints
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
|
||||
from calibre.ebooks.metadata import check_isbn
|
||||
@ -27,16 +24,16 @@ class Ozon(Source):
|
||||
description = _('Downloads metadata and covers from OZON.ru')
|
||||
|
||||
capabilities = frozenset(['identify', 'cover'])
|
||||
|
||||
|
||||
touched_fields = frozenset(['title', 'authors', 'identifier:isbn', 'identifier:ozon',
|
||||
'publisher', 'pubdate', 'comments', 'series', 'rating', 'language'])
|
||||
# Test purpose only, test function does not like when sometimes some filed are empty
|
||||
#touched_fields = frozenset(['title', 'authors', 'identifier:isbn', 'identifier:ozon',
|
||||
# 'publisher', 'pubdate', 'comments'])
|
||||
# 'publisher', 'pubdate', 'comments'])
|
||||
|
||||
supports_gzip_transfer_encoding = True
|
||||
has_html_comments = True
|
||||
|
||||
|
||||
ozon_url = 'http://www.ozon.ru'
|
||||
|
||||
# match any ISBN10/13. From "Regular Expressions Cookbook"
|
||||
@ -53,11 +50,11 @@ class Ozon(Source):
|
||||
res = ('ozon', ozon_id, url)
|
||||
return res
|
||||
# }}}
|
||||
|
||||
|
||||
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
|
||||
# div_book -> search only books, ebooks and audio books
|
||||
search_url = self.ozon_url + '/webservice/webservice.asmx/SearchWebService?searchContext=div_book&searchText='
|
||||
|
||||
|
||||
isbn = _format_isbn(log, identifiers.get('isbn', None))
|
||||
# TODO: format isbn!
|
||||
qItems = set([isbn, title])
|
||||
@ -66,7 +63,7 @@ class Ozon(Source):
|
||||
qItems.discard(None)
|
||||
qItems.discard('')
|
||||
qItems = map(_quoteString, qItems)
|
||||
|
||||
|
||||
q = ' '.join(qItems).strip()
|
||||
log.info(u'search string: ' + q)
|
||||
|
||||
@ -74,10 +71,10 @@ class Ozon(Source):
|
||||
q = q.encode('utf-8')
|
||||
if not q:
|
||||
return None
|
||||
|
||||
|
||||
search_url += quote_plus(q)
|
||||
log.debug(u'search url: %r'%search_url)
|
||||
|
||||
|
||||
return search_url
|
||||
# }}}
|
||||
|
||||
@ -93,11 +90,11 @@ class Ozon(Source):
|
||||
|
||||
try:
|
||||
raw = self.browser.open_novisit(query).read()
|
||||
|
||||
|
||||
except Exception as e:
|
||||
log.exception(u'Failed to make identify query: %r'%query)
|
||||
return as_unicode(e)
|
||||
|
||||
|
||||
try:
|
||||
parser = etree.XMLParser(recover=True, no_network=True)
|
||||
feed = etree.fromstring(xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True)[0], parser=parser)
|
||||
@ -110,14 +107,14 @@ class Ozon(Source):
|
||||
return as_unicode(e)
|
||||
|
||||
# }}}
|
||||
|
||||
|
||||
def get_metadata(self, log, entries, title, authors, identifiers): # {{{
|
||||
title = unicode(title).upper() if title else ''
|
||||
authors = map(unicode.upper, map(unicode, authors)) if authors else None
|
||||
ozon_id = identifiers.get('ozon', None)
|
||||
|
||||
|
||||
unk = unicode(_('Unknown')).upper()
|
||||
|
||||
|
||||
if title == unk:
|
||||
title = None
|
||||
|
||||
@ -129,7 +126,7 @@ class Ozon(Source):
|
||||
for miauthor in miauthors:
|
||||
if author in miauthor: return True
|
||||
return None
|
||||
|
||||
|
||||
def ensure_metadata_match(mi): # {{{
|
||||
match = True
|
||||
if title:
|
||||
@ -138,13 +135,13 @@ class Ozon(Source):
|
||||
if match and authors:
|
||||
miauthors = map(unicode.upper, map(unicode, mi.authors)) if mi.authors else []
|
||||
match = in_authors(authors, miauthors)
|
||||
|
||||
|
||||
if match and ozon_id:
|
||||
mozon_id = mi.identifiers['ozon']
|
||||
match = ozon_id == mozon_id
|
||||
|
||||
return match
|
||||
|
||||
|
||||
return match
|
||||
|
||||
metadata = []
|
||||
for i, entry in enumerate(entries):
|
||||
mi = self.to_metadata(log, entry)
|
||||
@ -159,64 +156,64 @@ class Ozon(Source):
|
||||
|
||||
def get_all_details(self, log, metadata, abort, result_queue, identifiers, timeout): # {{{
|
||||
req_isbn = identifiers.get('isbn', None)
|
||||
|
||||
|
||||
for mi in metadata:
|
||||
if abort.is_set():
|
||||
break
|
||||
try:
|
||||
ozon_id = mi.identifiers['ozon']
|
||||
|
||||
|
||||
try:
|
||||
self.get_book_details(log, mi, timeout)
|
||||
except:
|
||||
log.exception(u'Failed to get details for metadata: %s'%mi.title)
|
||||
|
||||
|
||||
all_isbns = getattr(mi, 'all_isbns', [])
|
||||
if req_isbn and all_isbns and check_isbn(req_isbn) not in all_isbns:
|
||||
log.debug(u'skipped, no requested ISBN %s found'%req_isbn)
|
||||
continue
|
||||
|
||||
|
||||
for isbn in all_isbns:
|
||||
self.cache_isbn_to_identifier(isbn, ozon_id)
|
||||
|
||||
|
||||
if mi.ozon_cover_url:
|
||||
self.cache_identifier_to_cover_url(ozon_id, mi.ozon_cover_url)
|
||||
|
||||
|
||||
self.clean_downloaded_metadata(mi)
|
||||
result_queue.put(mi)
|
||||
except:
|
||||
log.exception(u'Failed to get details for metadata: %s'%mi.title)
|
||||
# }}}
|
||||
|
||||
|
||||
def to_metadata(self, log, entry): # {{{
|
||||
xp_template = 'normalize-space(./*[local-name() = "{0}"]/text())'
|
||||
|
||||
|
||||
title = entry.xpath(xp_template.format('Name'))
|
||||
author = entry.xpath(xp_template.format('Author'))
|
||||
mi = Metadata(title, author.split(','))
|
||||
|
||||
|
||||
ozon_id = entry.xpath(xp_template.format('ID'))
|
||||
mi.identifiers = {'ozon':ozon_id}
|
||||
|
||||
|
||||
mi.comments = entry.xpath(xp_template.format('Annotation'))
|
||||
|
||||
|
||||
mi.ozon_cover_url = None
|
||||
cover = entry.xpath(xp_template.format('Picture'))
|
||||
if cover:
|
||||
mi.ozon_cover_url = _translateToBigCoverUrl(cover)
|
||||
|
||||
mi.ozon_cover_url = _translateToBigCoverUrl(cover)
|
||||
|
||||
rating = entry.xpath(xp_template.format('ClientRatingValue'))
|
||||
if rating:
|
||||
try:
|
||||
#'rating', A floating point number between 0 and 10
|
||||
# OZON raion N of 5, calibre of 10, but there is a bug? in identify
|
||||
# OZON raion N of 5, calibre of 10, but there is a bug? in identify
|
||||
mi.rating = float(rating)
|
||||
except:
|
||||
pass
|
||||
rating
|
||||
return mi
|
||||
# }}}
|
||||
|
||||
|
||||
def get_cached_cover_url(self, identifiers): # {{{
|
||||
url = None
|
||||
ozon_id = identifiers.get('ozon', None)
|
||||
@ -248,14 +245,14 @@ class Ozon(Source):
|
||||
cached_url = self.get_cached_cover_url(mi.identifiers)
|
||||
if cached_url is not None:
|
||||
break
|
||||
|
||||
|
||||
if cached_url is None:
|
||||
log.info('No cover found')
|
||||
return
|
||||
|
||||
if abort.is_set():
|
||||
return
|
||||
|
||||
|
||||
log.debug('Downloading cover from:', cached_url)
|
||||
try:
|
||||
cdata = self.browser.open_novisit(cached_url, timeout=timeout).read()
|
||||
@ -265,10 +262,10 @@ class Ozon(Source):
|
||||
log.exception(u'Failed to download cover from: %s'%cached_url)
|
||||
return as_unicode(e)
|
||||
# }}}
|
||||
|
||||
|
||||
def get_book_details(self, log, metadata, timeout): # {{{
|
||||
url = self.get_book_url(metadata.get_identifiers())[2]
|
||||
|
||||
|
||||
raw = self.browser.open_novisit(url, timeout=timeout).read()
|
||||
doc = html.fromstring(raw)
|
||||
|
||||
@ -298,14 +295,14 @@ class Ozon(Source):
|
||||
if matcher:
|
||||
year = int(matcher.group(0))
|
||||
# only year is available, so use 1-st of Jan
|
||||
metadata.pubdate = datetime.datetime(year, 1, 1) #<- failed comparation in identify.py
|
||||
metadata.pubdate = datetime.datetime(year, 1, 1) #<- failed comparation in identify.py
|
||||
#metadata.pubdate = datetime(year, 1, 1)
|
||||
xpt = u'substring-after(string(../text()[contains(., "Язык")]), ": ")'
|
||||
displLang = publishers[0].xpath(xpt)
|
||||
lang_code =_translageLanguageToCode(displLang)
|
||||
if lang_code:
|
||||
metadata.language = lang_code
|
||||
|
||||
|
||||
# overwrite comments from HTML if any
|
||||
# tr/td[contains(.//text(), "От издателя")] -> does not work, why?
|
||||
xpt = u'//div[contains(@class, "detail")]//tr/td//text()[contains(., "От издателя")]'\
|
||||
@ -323,14 +320,14 @@ class Ozon(Source):
|
||||
# }}}
|
||||
|
||||
def _quoteString(str): # {{{
|
||||
return '"' + str + '"' if str and str.find(' ') != -1 else str
|
||||
return '"' + str + '"' if str and str.find(' ') != -1 else str
|
||||
# }}}
|
||||
|
||||
# TODO: make customizable
|
||||
def _translateToBigCoverUrl(coverUrl): # {{{
|
||||
# http://www.ozon.ru/multimedia/books_covers/small/1002986468.gif
|
||||
# http://www.ozon.ru/multimedia/books_covers/1002986468.jpg
|
||||
|
||||
|
||||
m = re.match(r'^(.+\/)small\/(.+\.).+$', coverUrl)
|
||||
if m:
|
||||
coverUrl = m.group(1) + m.group(2) + 'jpg'
|
||||
@ -339,12 +336,12 @@ def _translateToBigCoverUrl(coverUrl): # {{{
|
||||
|
||||
def _get_affiliateId(): # {{{
|
||||
import random
|
||||
|
||||
|
||||
aff_id = 'romuk'
|
||||
# Use Kovid's affiliate id 30% of the time.
|
||||
if random.randint(1, 10) in (1, 2, 3):
|
||||
aff_id = 'kovidgoyal'
|
||||
return aff_id
|
||||
return aff_id
|
||||
# }}}
|
||||
|
||||
# for now only RUS ISBN are supported
|
||||
@ -387,10 +384,10 @@ def _format_isbn(log, isbn): # {{{
|
||||
def _translageLanguageToCode(displayLang): # {{{
|
||||
displayLang = unicode(displayLang).strip() if displayLang else None
|
||||
langTbl = { None: 'ru',
|
||||
u'Немецкий': 'de',
|
||||
u'Английский': 'en',
|
||||
u'Немецкий': 'de',
|
||||
u'Английский': 'en',
|
||||
u'Французский': 'fr',
|
||||
u'Итальянский': 'it',
|
||||
u'Итальянский': 'it',
|
||||
u'Испанский': 'es',
|
||||
u'Китайский': 'zh',
|
||||
u'Японский': 'ja' }
|
||||
@ -406,7 +403,7 @@ if __name__ == '__main__': # tests {{{
|
||||
|
||||
test_identify_plugin(Ozon.name,
|
||||
[
|
||||
|
||||
|
||||
(
|
||||
{'identifiers':{'isbn': '9785916572629'} },
|
||||
[title_test(u'На все четыре стороны', exact=True),
|
||||
@ -442,4 +439,4 @@ if __name__ == '__main__': # tests {{{
|
||||
[title_test(u'Метро', exact=False)]
|
||||
),
|
||||
])
|
||||
# }}}
|
||||
# }}}
|
||||
|
Loading…
x
Reference in New Issue
Block a user