mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
Update ozon.ru metadata download plugin to fix searching for books by ISBN
This commit is contained in:
parent
53c5ff5daa
commit
3b644906a4
@ -84,13 +84,15 @@ class Ozon(Source):
|
|||||||
# Added Russian variant of 'Unknown'
|
# Added Russian variant of 'Unknown'
|
||||||
unk = [_('Unknown').upper(), 'Неизв.'.upper(), icu_upper('Неизв.')]
|
unk = [_('Unknown').upper(), 'Неизв.'.upper(), icu_upper('Неизв.')]
|
||||||
|
|
||||||
if title and title not in unk:
|
# use only ozonid if specified otherwise ozon.ru does not like a combination
|
||||||
qItems.add(title)
|
if not ozonid:
|
||||||
|
if title and title not in unk:
|
||||||
|
qItems.add(title)
|
||||||
|
|
||||||
if authors:
|
if authors:
|
||||||
for auth in authors:
|
for auth in authors:
|
||||||
if icu_upper(auth) not in unk:
|
if icu_upper(auth) not in unk:
|
||||||
qItems.add(auth)
|
qItems.add(auth)
|
||||||
|
|
||||||
qItems.discard(None)
|
qItems.discard(None)
|
||||||
qItems.discard('')
|
qItems.discard('')
|
||||||
@ -102,7 +104,7 @@ class Ozon(Source):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
search_url += quote_plus(searchText)
|
search_url += quote_plus(searchText)
|
||||||
log.debug(u'search url: %r' % search_url)
|
log.debug(u'search url: %s' % search_url)
|
||||||
return search_url
|
return search_url
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
@ -112,6 +114,7 @@ class Ozon(Source):
|
|||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from HTMLParser import HTMLParser
|
from HTMLParser import HTMLParser
|
||||||
from lxml import etree, html
|
from lxml import etree, html
|
||||||
|
import json
|
||||||
|
|
||||||
if not self.is_configured():
|
if not self.is_configured():
|
||||||
return
|
return
|
||||||
@ -131,8 +134,11 @@ class Ozon(Source):
|
|||||||
doc = html.fromstring(xml_to_unicode(raw, verbose=True)[0])
|
doc = html.fromstring(xml_to_unicode(raw, verbose=True)[0])
|
||||||
entries_block = doc.xpath(u'//div[@class="bSearchResult"]')
|
entries_block = doc.xpath(u'//div[@class="bSearchResult"]')
|
||||||
|
|
||||||
|
# log.debug(u'HTML: %s' % xml_to_unicode(raw, verbose=True)[0])
|
||||||
|
|
||||||
if entries_block:
|
if entries_block:
|
||||||
entries = doc.xpath(u'//div[contains(@itemprop, "itemListElement")]')
|
entries = doc.xpath(u'//div[contains(@itemprop, "itemListElement")]')
|
||||||
|
# log.debug(u'entries_block')
|
||||||
# for entry in entries:
|
# for entry in entries:
|
||||||
# log.debug('entries %s' % entree.tostring(entry))
|
# log.debug('entries %s' % entree.tostring(entry))
|
||||||
metadata = self.get_metadata(log, entries, title, authors, identifiers)
|
metadata = self.get_metadata(log, entries, title, authors, identifiers)
|
||||||
@ -140,19 +146,30 @@ class Ozon(Source):
|
|||||||
else:
|
else:
|
||||||
# Redirect page: trying to extract ozon_id from javascript data
|
# Redirect page: trying to extract ozon_id from javascript data
|
||||||
h = HTMLParser()
|
h = HTMLParser()
|
||||||
entry_string = (h.unescape(unicode(etree.tostring(doc, pretty_print=True))))
|
entry_string = (h.unescape(etree.tostring(doc, pretty_print=True, encoding=unicode)))
|
||||||
id_title_pat = re.compile(u'products":\[{"id":(\d{7}),"name":"([а-яА-Я :\-0-9]+)')
|
json_pat = re.compile(u'dataLayer\s*=\s*(.+)?;')
|
||||||
# result containing ozon_id and entry_title
|
json_info = re.search(json_pat, entry_string)
|
||||||
entry_info = re.search(id_title_pat, entry_string)
|
jsondata = json_info.group(1) if json_info else None
|
||||||
ozon_id = entry_info.group(1) if entry_info else None
|
|
||||||
entry_title = entry_info.group(2) if entry_info else None
|
|
||||||
|
|
||||||
if ozon_id:
|
# log.debug(u'jsondata: %s' % jsondata)
|
||||||
metadata = self.to_metadata_for_single_entry(log, ozon_id, entry_title, authors)
|
dataLayer = json.loads(jsondata) if jsondata else None
|
||||||
identifiers['ozon'] = ozon_id
|
|
||||||
self.get_all_details(log, [metadata], abort, result_queue, identifiers, timeout, cachedPagesDict={})
|
ozon_id = None
|
||||||
else:
|
if dataLayer and dataLayer[0] and 'ecommerce' in dataLayer[0]:
|
||||||
log.error('No SearchResults in Ozon.ru response found')
|
jsproduct = dataLayer[0]['ecommerce']['detail']['products'][0]
|
||||||
|
ozon_id = as_unicode(jsproduct['id'])
|
||||||
|
entry_title = as_unicode(jsproduct['name'])
|
||||||
|
|
||||||
|
log.debug(u'ozon_id %s' % ozon_id)
|
||||||
|
log.debug(u'entry_title %s' % entry_title)
|
||||||
|
|
||||||
|
if ozon_id:
|
||||||
|
metadata = self.to_metadata_for_single_entry(log, ozon_id, entry_title, authors)
|
||||||
|
identifiers['ozon'] = ozon_id
|
||||||
|
self.get_all_details(log, [metadata], abort, result_queue, identifiers, timeout, cachedPagesDict={})
|
||||||
|
|
||||||
|
if not ozon_id:
|
||||||
|
log.error('No SearchResults in Ozon.ru response found!')
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.exception('Failed to parse identify results')
|
log.exception('Failed to parse identify results')
|
||||||
@ -478,7 +495,7 @@ class Ozon(Source):
|
|||||||
langs_elem = doc.xpath(u'//div[contains(text(), "зык")]')
|
langs_elem = doc.xpath(u'//div[contains(text(), "зык")]')
|
||||||
if langs_elem:
|
if langs_elem:
|
||||||
langs_elem = langs_elem[0].getnext()
|
langs_elem = langs_elem[0].getnext()
|
||||||
langs = langs_elem.xpath(u'text()')[0].strip()
|
langs = langs_elem.xpath(u'text()')[0].strip() if langs_elem else None
|
||||||
if langs:
|
if langs:
|
||||||
lng_splt = langs.split(u',')
|
lng_splt = langs.split(u',')
|
||||||
if lng_splt:
|
if lng_splt:
|
||||||
@ -576,9 +593,9 @@ def _format_isbn(log, isbn): # {{{
|
|||||||
log.error('cannot format ISBN %s. Fow now only russian ISBNs are supported' % isbn)
|
log.error('cannot format ISBN %s. Fow now only russian ISBNs are supported' % isbn)
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
|
||||||
def _translageLanguageToCode(displayLang): # {{{
|
def _translageLanguageToCode(displayLang): # {{{
|
||||||
displayLang = unicode(displayLang).strip() if displayLang else None
|
displayLang = unicode(displayLang).strip() if displayLang else None
|
||||||
langTbl = {None: 'ru',
|
langTbl = {None: 'ru',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user