mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Get Books: Add XinXii store. Metadata download plugin for ozon.ru, enabled only when user selects russian as their language in the welcome wizard.
This commit is contained in:
commit
65a2931f68
@ -590,8 +590,9 @@ from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
|
|||||||
from calibre.ebooks.metadata.sources.isbndb import ISBNDB
|
from calibre.ebooks.metadata.sources.isbndb import ISBNDB
|
||||||
from calibre.ebooks.metadata.sources.overdrive import OverDrive
|
from calibre.ebooks.metadata.sources.overdrive import OverDrive
|
||||||
from calibre.ebooks.metadata.sources.douban import Douban
|
from calibre.ebooks.metadata.sources.douban import Douban
|
||||||
|
from calibre.ebooks.metadata.sources.ozon import Ozon
|
||||||
|
|
||||||
plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban]
|
plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon]
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
@ -1476,6 +1477,14 @@ class StoreWoblinkStore(StoreBase):
|
|||||||
headquarters = 'PL'
|
headquarters = 'PL'
|
||||||
formats = ['EPUB']
|
formats = ['EPUB']
|
||||||
|
|
||||||
|
class XinXiiStore(StoreBase):
|
||||||
|
name = 'XinXii'
|
||||||
|
description = ''
|
||||||
|
actual_plugin = 'calibre.gui2.store.stores.xinxii_plugin:XinXiiStore'
|
||||||
|
|
||||||
|
headquarters = 'DE'
|
||||||
|
formats = ['EPUB', 'PDF']
|
||||||
|
|
||||||
class StoreZixoStore(StoreBase):
|
class StoreZixoStore(StoreBase):
|
||||||
name = 'Zixo'
|
name = 'Zixo'
|
||||||
author = u'Tomasz Długosz'
|
author = u'Tomasz Długosz'
|
||||||
@ -1524,6 +1533,7 @@ plugins += [
|
|||||||
StoreWHSmithUKStore,
|
StoreWHSmithUKStore,
|
||||||
StoreWizardsTowerBooksStore,
|
StoreWizardsTowerBooksStore,
|
||||||
StoreWoblinkStore,
|
StoreWoblinkStore,
|
||||||
|
XinXiiStore,
|
||||||
StoreZixoStore
|
StoreZixoStore
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -92,7 +92,7 @@ def restore_plugin_state_to_default(plugin_or_name):
|
|||||||
config['enabled_plugins'] = ep
|
config['enabled_plugins'] = ep
|
||||||
|
|
||||||
default_disabled_plugins = set([
|
default_disabled_plugins = set([
|
||||||
'Overdrive', 'Douban Books',
|
'Overdrive', 'Douban Books', 'OZON.ru',
|
||||||
])
|
])
|
||||||
|
|
||||||
def is_disabled(plugin):
|
def is_disabled(plugin):
|
||||||
|
@ -11,7 +11,7 @@ from functools import partial
|
|||||||
from base64 import b64decode
|
from base64 import b64decode
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from calibre.utils.date import parse_date
|
from calibre.utils.date import parse_date
|
||||||
from calibre import guess_all_extensions, prints, force_unicode
|
from calibre import guess_type, guess_all_extensions, prints, force_unicode
|
||||||
from calibre.ebooks.metadata import MetaInformation, check_isbn
|
from calibre.ebooks.metadata import MetaInformation, check_isbn
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
|
||||||
@ -147,6 +147,12 @@ def _parse_cover_data(root, imgid, mi):
|
|||||||
if elm_binary:
|
if elm_binary:
|
||||||
mimetype = elm_binary[0].get('content-type', 'image/jpeg')
|
mimetype = elm_binary[0].get('content-type', 'image/jpeg')
|
||||||
mime_extensions = guess_all_extensions(mimetype)
|
mime_extensions = guess_all_extensions(mimetype)
|
||||||
|
|
||||||
|
if not mime_extensions and mimetype.startswith('image/'):
|
||||||
|
mimetype_fromid = guess_type(imgid)[0]
|
||||||
|
if mimetype_fromid and mimetype_fromid.startswith('image/'):
|
||||||
|
mime_extensions = guess_all_extensions(mimetype_fromid)
|
||||||
|
|
||||||
if mime_extensions:
|
if mime_extensions:
|
||||||
pic_data = elm_binary[0].text
|
pic_data = elm_binary[0].text
|
||||||
if pic_data:
|
if pic_data:
|
||||||
|
445
src/calibre/ebooks/metadata/sources/ozon.py
Normal file
445
src/calibre/ebooks/metadata/sources/ozon.py
Normal file
@ -0,0 +1,445 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
|
from xml.etree.ElementTree import _Element
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2011, Roman Mukhin <ramses_ru at hotmail.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import re
|
||||||
|
import urllib2
|
||||||
|
import datetime
|
||||||
|
from urllib import quote_plus
|
||||||
|
from Queue import Queue, Empty
|
||||||
|
from lxml import etree, html
|
||||||
|
from lxml.etree import ElementBase
|
||||||
|
from calibre import as_unicode
|
||||||
|
|
||||||
|
from calibre import prints
|
||||||
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
|
||||||
|
from calibre.ebooks.metadata import check_isbn
|
||||||
|
from calibre.ebooks.metadata.sources.base import Source
|
||||||
|
from calibre.ebooks.metadata.book.base import Metadata
|
||||||
|
|
||||||
|
class Ozon(Source):
|
||||||
|
name = 'OZON.ru'
|
||||||
|
description = _('Downloads metadata and covers from OZON.ru')
|
||||||
|
|
||||||
|
capabilities = frozenset(['identify', 'cover'])
|
||||||
|
|
||||||
|
touched_fields = frozenset(['title', 'authors', 'identifier:isbn', 'identifier:ozon',
|
||||||
|
'publisher', 'pubdate', 'comments', 'series', 'rating', 'language'])
|
||||||
|
# Test purpose only, test function does not like when sometimes some filed are empty
|
||||||
|
#touched_fields = frozenset(['title', 'authors', 'identifier:isbn', 'identifier:ozon',
|
||||||
|
# 'publisher', 'pubdate', 'comments'])
|
||||||
|
|
||||||
|
supports_gzip_transfer_encoding = True
|
||||||
|
has_html_comments = True
|
||||||
|
|
||||||
|
ozon_url = 'http://www.ozon.ru'
|
||||||
|
|
||||||
|
# match any ISBN10/13. From "Regular Expressions Cookbook"
|
||||||
|
isbnPattern = r'(?:ISBN(?:-1[03])?:? )?(?=[-0-9 ]{17}|'\
|
||||||
|
'[-0-9X ]{13}|[0-9X]{10})(?:97[89][- ]?)?[0-9]{1,5}[- ]?'\
|
||||||
|
'(?:[0-9]+[- ]?){2}[0-9X]'
|
||||||
|
isbnRegex = re.compile(isbnPattern)
|
||||||
|
|
||||||
|
def get_book_url(self, identifiers): # {{{
|
||||||
|
ozon_id = identifiers.get('ozon', None)
|
||||||
|
res = None
|
||||||
|
if ozon_id:
|
||||||
|
url = '{}/context/detail/id/{}?partner={}'.format(self.ozon_url, urllib2.quote(ozon_id), _get_affiliateId())
|
||||||
|
res = ('ozon', ozon_id, url)
|
||||||
|
return res
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
|
||||||
|
# div_book -> search only books, ebooks and audio books
|
||||||
|
search_url = self.ozon_url + '/webservice/webservice.asmx/SearchWebService?searchContext=div_book&searchText='
|
||||||
|
|
||||||
|
isbn = _format_isbn(log, identifiers.get('isbn', None))
|
||||||
|
# TODO: format isbn!
|
||||||
|
qItems = set([isbn, title])
|
||||||
|
if authors:
|
||||||
|
qItems |= frozenset(authors)
|
||||||
|
qItems.discard(None)
|
||||||
|
qItems.discard('')
|
||||||
|
qItems = map(_quoteString, qItems)
|
||||||
|
|
||||||
|
q = ' '.join(qItems).strip()
|
||||||
|
log.info(u'search string: ' + q)
|
||||||
|
|
||||||
|
if isinstance(q, unicode):
|
||||||
|
q = q.encode('utf-8')
|
||||||
|
if not q:
|
||||||
|
return None
|
||||||
|
|
||||||
|
search_url += quote_plus(q)
|
||||||
|
log.debug(u'search url: %r'%search_url)
|
||||||
|
|
||||||
|
return search_url
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
|
||||||
|
identifiers={}, timeout=30):
|
||||||
|
if not self.is_configured():
|
||||||
|
return
|
||||||
|
query = self.create_query(log, title=title, authors=authors, identifiers=identifiers)
|
||||||
|
if not query:
|
||||||
|
err = 'Insufficient metadata to construct query'
|
||||||
|
log.error(err)
|
||||||
|
return err
|
||||||
|
|
||||||
|
try:
|
||||||
|
raw = self.browser.open_novisit(query).read()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log.exception(u'Failed to make identify query: %r'%query)
|
||||||
|
return as_unicode(e)
|
||||||
|
|
||||||
|
try:
|
||||||
|
parser = etree.XMLParser(recover=True, no_network=True)
|
||||||
|
feed = etree.fromstring(xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True)[0], parser=parser)
|
||||||
|
entries = feed.xpath('//*[local-name() = "SearchItems"]')
|
||||||
|
if entries:
|
||||||
|
metadata = self.get_metadata(log, entries, title, authors, identifiers)
|
||||||
|
self.get_all_details(log, metadata, abort, result_queue, identifiers, timeout)
|
||||||
|
except Exception as e:
|
||||||
|
log.exception('Failed to parse identify results')
|
||||||
|
return as_unicode(e)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
def get_metadata(self, log, entries, title, authors, identifiers): # {{{
|
||||||
|
title = unicode(title).upper() if title else ''
|
||||||
|
authors = map(unicode.upper, map(unicode, authors)) if authors else None
|
||||||
|
ozon_id = identifiers.get('ozon', None)
|
||||||
|
|
||||||
|
unk = unicode(_('Unknown')).upper()
|
||||||
|
|
||||||
|
if title == unk:
|
||||||
|
title = None
|
||||||
|
|
||||||
|
if authors == [unk]:
|
||||||
|
authors = None
|
||||||
|
|
||||||
|
def in_authors(authors, miauthors):
|
||||||
|
for author in authors:
|
||||||
|
for miauthor in miauthors:
|
||||||
|
if author in miauthor: return True
|
||||||
|
return None
|
||||||
|
|
||||||
|
def ensure_metadata_match(mi): # {{{
|
||||||
|
match = True
|
||||||
|
if title:
|
||||||
|
mititle = unicode(mi.title).upper() if mi.title else ''
|
||||||
|
match = title in mititle
|
||||||
|
if match and authors:
|
||||||
|
miauthors = map(unicode.upper, map(unicode, mi.authors)) if mi.authors else []
|
||||||
|
match = in_authors(authors, miauthors)
|
||||||
|
|
||||||
|
if match and ozon_id:
|
||||||
|
mozon_id = mi.identifiers['ozon']
|
||||||
|
match = ozon_id == mozon_id
|
||||||
|
|
||||||
|
return match
|
||||||
|
|
||||||
|
metadata = []
|
||||||
|
for i, entry in enumerate(entries):
|
||||||
|
mi = self.to_metadata(log, entry)
|
||||||
|
mi.source_relevance = i
|
||||||
|
if ensure_metadata_match(mi):
|
||||||
|
metadata.append(mi)
|
||||||
|
# log.debug(u'added metadata %s %s. '%(mi.title, mi.authors))
|
||||||
|
else:
|
||||||
|
log.debug(u'skipped metadata %s %s. (does not match the query)'%(mi.title, mi.authors))
|
||||||
|
return metadata
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
def get_all_details(self, log, metadata, abort, result_queue, identifiers, timeout): # {{{
|
||||||
|
req_isbn = identifiers.get('isbn', None)
|
||||||
|
|
||||||
|
for mi in metadata:
|
||||||
|
if abort.is_set():
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
ozon_id = mi.identifiers['ozon']
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.get_book_details(log, mi, timeout)
|
||||||
|
except:
|
||||||
|
log.exception(u'Failed to get details for metadata: %s'%mi.title)
|
||||||
|
|
||||||
|
all_isbns = getattr(mi, 'all_isbns', [])
|
||||||
|
if req_isbn and all_isbns and check_isbn(req_isbn) not in all_isbns:
|
||||||
|
log.debug(u'skipped, no requested ISBN %s found'%req_isbn)
|
||||||
|
continue
|
||||||
|
|
||||||
|
for isbn in all_isbns:
|
||||||
|
self.cache_isbn_to_identifier(isbn, ozon_id)
|
||||||
|
|
||||||
|
if mi.ozon_cover_url:
|
||||||
|
self.cache_identifier_to_cover_url(ozon_id, mi.ozon_cover_url)
|
||||||
|
|
||||||
|
self.clean_downloaded_metadata(mi)
|
||||||
|
result_queue.put(mi)
|
||||||
|
except:
|
||||||
|
log.exception(u'Failed to get details for metadata: %s'%mi.title)
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
def to_metadata(self, log, entry): # {{{
|
||||||
|
xp_template = 'normalize-space(./*[local-name() = "{0}"]/text())'
|
||||||
|
|
||||||
|
title = entry.xpath(xp_template.format('Name'))
|
||||||
|
author = entry.xpath(xp_template.format('Author'))
|
||||||
|
mi = Metadata(title, author.split(','))
|
||||||
|
|
||||||
|
ozon_id = entry.xpath(xp_template.format('ID'))
|
||||||
|
mi.identifiers = {'ozon':ozon_id}
|
||||||
|
|
||||||
|
mi.comments = entry.xpath(xp_template.format('Annotation'))
|
||||||
|
|
||||||
|
mi.ozon_cover_url = None
|
||||||
|
cover = entry.xpath(xp_template.format('Picture'))
|
||||||
|
if cover:
|
||||||
|
mi.ozon_cover_url = _translateToBigCoverUrl(cover)
|
||||||
|
|
||||||
|
rating = entry.xpath(xp_template.format('ClientRatingValue'))
|
||||||
|
if rating:
|
||||||
|
try:
|
||||||
|
#'rating', A floating point number between 0 and 10
|
||||||
|
# OZON raion N of 5, calibre of 10, but there is a bug? in identify
|
||||||
|
mi.rating = float(rating)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
rating
|
||||||
|
return mi
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
def get_cached_cover_url(self, identifiers): # {{{
|
||||||
|
url = None
|
||||||
|
ozon_id = identifiers.get('ozon', None)
|
||||||
|
if ozon_id is None:
|
||||||
|
isbn = identifiers.get('isbn', None)
|
||||||
|
if isbn is not None:
|
||||||
|
ozon_id = self.cached_isbn_to_identifier(isbn)
|
||||||
|
if ozon_id is not None:
|
||||||
|
url = self.cached_identifier_to_cover_url(ozon_id)
|
||||||
|
return url
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
def download_cover(self, log, result_queue, abort, title=None, authors=None, identifiers={}, timeout=30): # {{{
|
||||||
|
cached_url = self.get_cached_cover_url(identifiers)
|
||||||
|
if cached_url is None:
|
||||||
|
log.debug('No cached cover found, running identify')
|
||||||
|
rq = Queue()
|
||||||
|
self.identify(log, rq, abort, title=title, authors=authors, identifiers=identifiers)
|
||||||
|
if abort.is_set():
|
||||||
|
return
|
||||||
|
results = []
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
results.append(rq.get_nowait())
|
||||||
|
except Empty:
|
||||||
|
break
|
||||||
|
results.sort(key=self.identify_results_keygen(title=title, authors=authors, identifiers=identifiers))
|
||||||
|
for mi in results:
|
||||||
|
cached_url = self.get_cached_cover_url(mi.identifiers)
|
||||||
|
if cached_url is not None:
|
||||||
|
break
|
||||||
|
|
||||||
|
if cached_url is None:
|
||||||
|
log.info('No cover found')
|
||||||
|
return
|
||||||
|
|
||||||
|
if abort.is_set():
|
||||||
|
return
|
||||||
|
|
||||||
|
log.debug('Downloading cover from:', cached_url)
|
||||||
|
try:
|
||||||
|
cdata = self.browser.open_novisit(cached_url, timeout=timeout).read()
|
||||||
|
if cdata:
|
||||||
|
result_queue.put((self, cdata))
|
||||||
|
except Exception as e:
|
||||||
|
log.exception(u'Failed to download cover from: %s'%cached_url)
|
||||||
|
return as_unicode(e)
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
def get_book_details(self, log, metadata, timeout): # {{{
|
||||||
|
url = self.get_book_url(metadata.get_identifiers())[2]
|
||||||
|
|
||||||
|
raw = self.browser.open_novisit(url, timeout=timeout).read()
|
||||||
|
doc = html.fromstring(raw)
|
||||||
|
|
||||||
|
# series
|
||||||
|
xpt = u'normalize-space(//div[@class="frame_content"]//div[contains(normalize-space(text()), "Серия:")]//a/@title)'
|
||||||
|
series = doc.xpath(xpt)
|
||||||
|
if series:
|
||||||
|
metadata.series = series
|
||||||
|
|
||||||
|
xpt = u'substring-after(//meta[@name="description"]/@content, "ISBN")'
|
||||||
|
isbn_str = doc.xpath(xpt)
|
||||||
|
if isbn_str:
|
||||||
|
all_isbns = [check_isbn(isbn) for isbn in self.isbnRegex.findall(isbn_str) if check_isbn(isbn)]
|
||||||
|
if all_isbns:
|
||||||
|
metadata.all_isbns = all_isbns
|
||||||
|
metadata.isbn = all_isbns[0]
|
||||||
|
|
||||||
|
xpt = u'//div[@class="frame_content"]//div[contains(normalize-space(text()), "Издатель")]//a[@title="Издательство"]'
|
||||||
|
publishers = doc.xpath(xpt)
|
||||||
|
if publishers:
|
||||||
|
metadata.publisher = publishers[0].text
|
||||||
|
|
||||||
|
xpt = u'string(../text()[contains(., "г.")])'
|
||||||
|
yearIn = publishers[0].xpath(xpt)
|
||||||
|
if yearIn:
|
||||||
|
matcher = re.search(r'\d{4}', yearIn)
|
||||||
|
if matcher:
|
||||||
|
year = int(matcher.group(0))
|
||||||
|
# only year is available, so use 1-st of Jan
|
||||||
|
metadata.pubdate = datetime.datetime(year, 1, 1) #<- failed comparation in identify.py
|
||||||
|
#metadata.pubdate = datetime(year, 1, 1)
|
||||||
|
xpt = u'substring-after(string(../text()[contains(., "Язык")]), ": ")'
|
||||||
|
displLang = publishers[0].xpath(xpt)
|
||||||
|
lang_code =_translageLanguageToCode(displLang)
|
||||||
|
if lang_code:
|
||||||
|
metadata.language = lang_code
|
||||||
|
|
||||||
|
# overwrite comments from HTML if any
|
||||||
|
# tr/td[contains(.//text(), "От издателя")] -> does not work, why?
|
||||||
|
xpt = u'//div[contains(@class, "detail")]//tr/td//text()[contains(., "От издателя")]'\
|
||||||
|
u'/ancestor::tr[1]/following-sibling::tr[1]/td[contains(./@class, "description")][1]'
|
||||||
|
comment_elem = doc.xpath(xpt)
|
||||||
|
if comment_elem:
|
||||||
|
comments = unicode(etree.tostring(comment_elem[0]))
|
||||||
|
if comments:
|
||||||
|
# cleanup root tag, TODO: remove tags like object/embeded
|
||||||
|
comments = re.sub(r'^<td.+?>|</td>.+?$', u'', comments).strip()
|
||||||
|
if comments:
|
||||||
|
metadata.comments = comments
|
||||||
|
else:
|
||||||
|
log.debug('No book description found in HTML')
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
def _quoteString(str): # {{{
|
||||||
|
return '"' + str + '"' if str and str.find(' ') != -1 else str
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# TODO: make customizable
|
||||||
|
def _translateToBigCoverUrl(coverUrl): # {{{
|
||||||
|
# http://www.ozon.ru/multimedia/books_covers/small/1002986468.gif
|
||||||
|
# http://www.ozon.ru/multimedia/books_covers/1002986468.jpg
|
||||||
|
|
||||||
|
m = re.match(r'^(.+\/)small\/(.+\.).+$', coverUrl)
|
||||||
|
if m:
|
||||||
|
coverUrl = m.group(1) + m.group(2) + 'jpg'
|
||||||
|
return coverUrl
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
def _get_affiliateId(): # {{{
|
||||||
|
import random
|
||||||
|
|
||||||
|
aff_id = 'romuk'
|
||||||
|
# Use Kovid's affiliate id 30% of the time.
|
||||||
|
if random.randint(1, 10) in (1, 2, 3):
|
||||||
|
aff_id = 'kovidgoyal'
|
||||||
|
return aff_id
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# for now only RUS ISBN are supported
|
||||||
|
#http://ru.wikipedia.org/wiki/ISBN_российских_издательств
|
||||||
|
isbn_pat = re.compile(r"""
|
||||||
|
^
|
||||||
|
(\d{3})? # match GS1 Prefix for ISBN13
|
||||||
|
(5) # group identifier for rRussian-speaking countries
|
||||||
|
( # begin variable length for Publisher
|
||||||
|
[01]\d{1}| # 2x
|
||||||
|
[2-6]\d{2}| # 3x
|
||||||
|
7\d{3}| # 4x (starting with 7)
|
||||||
|
8[0-4]\d{2}| # 4x (starting with 8)
|
||||||
|
9[2567]\d{2}| # 4x (starting with 9)
|
||||||
|
99[26]\d{1}| # 4x (starting with 99)
|
||||||
|
8[5-9]\d{3}| # 5x (starting with 8)
|
||||||
|
9[348]\d{3}| # 5x (starting with 9)
|
||||||
|
900\d{2}| # 5x (starting with 900)
|
||||||
|
91[0-8]\d{2}| # 5x (starting with 91)
|
||||||
|
90[1-9]\d{3}| # 6x (starting with 90)
|
||||||
|
919\d{3}| # 6x (starting with 919)
|
||||||
|
99[^26]\d{4} # 7x (starting with 99)
|
||||||
|
) # end variable length for Publisher
|
||||||
|
(\d+) # Title
|
||||||
|
([\dX]) # Check digit
|
||||||
|
$
|
||||||
|
""", re.VERBOSE)
|
||||||
|
|
||||||
|
def _format_isbn(log, isbn): # {{{
|
||||||
|
res = check_isbn(isbn)
|
||||||
|
if res:
|
||||||
|
m = isbn_pat.match(res)
|
||||||
|
if m:
|
||||||
|
res = '-'.join([g for g in m.groups() if g])
|
||||||
|
else:
|
||||||
|
log.error('cannot format isbn %s'%isbn)
|
||||||
|
return res
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
def _translageLanguageToCode(displayLang): # {{{
|
||||||
|
displayLang = unicode(displayLang).strip() if displayLang else None
|
||||||
|
langTbl = { None: 'ru',
|
||||||
|
u'Немецкий': 'de',
|
||||||
|
u'Английский': 'en',
|
||||||
|
u'Французский': 'fr',
|
||||||
|
u'Итальянский': 'it',
|
||||||
|
u'Испанский': 'es',
|
||||||
|
u'Китайский': 'zh',
|
||||||
|
u'Японский': 'ja' }
|
||||||
|
return langTbl.get(displayLang, None)
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
if __name__ == '__main__': # tests {{{
|
||||||
|
# To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/ozon.py
|
||||||
|
# comment some touched_fields before run thoses tests
|
||||||
|
from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
|
||||||
|
title_test, authors_test, isbn_test)
|
||||||
|
|
||||||
|
|
||||||
|
test_identify_plugin(Ozon.name,
|
||||||
|
[
|
||||||
|
|
||||||
|
(
|
||||||
|
{'identifiers':{'isbn': '9785916572629'} },
|
||||||
|
[title_test(u'На все четыре стороны', exact=True),
|
||||||
|
authors_test([u'А. А. Гилл'])]
|
||||||
|
),
|
||||||
|
(
|
||||||
|
{'identifiers':{}, 'title':u'Der Himmel Kennt Keine Gunstlinge',
|
||||||
|
'authors':[u'Erich Maria Remarque']},
|
||||||
|
[title_test(u'Der Himmel Kennt Keine Gunstlinge', exact=True),
|
||||||
|
authors_test([u'Erich Maria Remarque'])]
|
||||||
|
),
|
||||||
|
(
|
||||||
|
{'identifiers':{ }, 'title':u'Метро 2033',
|
||||||
|
'authors':[u'Дмитрий Глуховский']},
|
||||||
|
[title_test(u'Метро 2033', exact=False)]
|
||||||
|
),
|
||||||
|
(
|
||||||
|
{'identifiers':{'isbn': '9785170727209'}, 'title':u'Метро 2033',
|
||||||
|
'authors':[u'Дмитрий Глуховский']},
|
||||||
|
[title_test(u'Метро 2033', exact=True),
|
||||||
|
authors_test([u'Дмитрий Глуховский']),
|
||||||
|
isbn_test('9785170727209')]
|
||||||
|
),
|
||||||
|
(
|
||||||
|
{'identifiers':{'isbn': '5-699-13613-4'}, 'title':u'Метро 2033',
|
||||||
|
'authors':[u'Дмитрий Глуховский']},
|
||||||
|
[title_test(u'Метро 2033', exact=True),
|
||||||
|
authors_test([u'Дмитрий Глуховский'])]
|
||||||
|
),
|
||||||
|
(
|
||||||
|
{'identifiers':{}, 'title':u'Метро',
|
||||||
|
'authors':[u'Глуховский']},
|
||||||
|
[title_test(u'Метро', exact=False)]
|
||||||
|
),
|
||||||
|
])
|
||||||
|
# }}}
|
@ -50,6 +50,7 @@ class OzonRUStore(BasicStoreConfig, StorePlugin):
|
|||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
search_url = self.shop_url + '/webservice/webservice.asmx/SearchWebService?'\
|
search_url = self.shop_url + '/webservice/webservice.asmx/SearchWebService?'\
|
||||||
'searchText=%s&searchContext=ebook' % urllib2.quote(query)
|
'searchText=%s&searchContext=ebook' % urllib2.quote(query)
|
||||||
|
xp_template = 'normalize-space(./*[local-name() = "{0}"]/text())'
|
||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
br = browser()
|
br = browser()
|
||||||
@ -60,17 +61,14 @@ class OzonRUStore(BasicStoreConfig, StorePlugin):
|
|||||||
if counter <= 0:
|
if counter <= 0:
|
||||||
break
|
break
|
||||||
counter -= 1
|
counter -= 1
|
||||||
|
|
||||||
xp_template = 'normalize-space(./*[local-name() = "{0}"]/text())'
|
|
||||||
|
|
||||||
s = SearchResult()
|
s = SearchResult()
|
||||||
s.detail_item = data.xpath(xp_template.format('ID'))
|
s.detail_item = data.xpath(xp_template.format('ID'))
|
||||||
s.title = data.xpath(xp_template.format('Name'))
|
s.title = data.xpath(xp_template.format('Name'))
|
||||||
s.author = data.xpath(xp_template.format('Author'))
|
s.author = data.xpath(xp_template.format('Author'))
|
||||||
s.price = data.xpath(xp_template.format('Price'))
|
s.price = data.xpath(xp_template.format('Price'))
|
||||||
s.cover_url = data.xpath(xp_template.format('Picture'))
|
s.cover_url = data.xpath(xp_template.format('Picture'))
|
||||||
if re.match("^\d+?\.\d+?$", s.price):
|
s.price = format_price_in_RUR(s.price)
|
||||||
s.price = u'{:.2F} руб.'.format(float(s.price))
|
|
||||||
yield s
|
yield s
|
||||||
|
|
||||||
def get_details(self, search_result, timeout=60):
|
def get_details(self, search_result, timeout=60):
|
||||||
@ -97,7 +95,22 @@ class OzonRUStore(BasicStoreConfig, StorePlugin):
|
|||||||
# unfortunately no direct links to download books (only buy link)
|
# unfortunately no direct links to download books (only buy link)
|
||||||
# search_result.downloads['BF2'] = self.shop_url + '/order/digitalorder.aspx?id=' + + urllib2.quote(search_result.detail_item)
|
# search_result.downloads['BF2'] = self.shop_url + '/order/digitalorder.aspx?id=' + + urllib2.quote(search_result.detail_item)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
def format_price_in_RUR(price):
|
||||||
|
'''
|
||||||
|
Try to format price according ru locale: '12 212,34 руб.'
|
||||||
|
@param price: price in format like 25.99
|
||||||
|
@return: formatted price if possible otherwise original value
|
||||||
|
@rtype: unicode
|
||||||
|
'''
|
||||||
|
if price and re.match("^\d*?\.\d*?$", price):
|
||||||
|
try:
|
||||||
|
price = u'{:,.2F} руб.'.format(float(price))
|
||||||
|
price = price.replace(',', ' ').replace('.', ',', 1)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return price
|
||||||
|
|
||||||
def _parse_ebook_formats(formatsStr):
|
def _parse_ebook_formats(formatsStr):
|
||||||
'''
|
'''
|
||||||
Creates a list with displayable names of the formats
|
Creates a list with displayable names of the formats
|
||||||
|
81
src/calibre/gui2/store/stores/xinxii_plugin.py
Normal file
81
src/calibre/gui2/store/stores/xinxii_plugin.py
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import urllib
|
||||||
|
from contextlib import closing
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
from calibre import browser
|
||||||
|
from calibre.gui2.store.basic_config import BasicStoreConfig
|
||||||
|
from calibre.gui2.store.opensearch_store import OpenSearchOPDSStore
|
||||||
|
from calibre.gui2.store.search_result import SearchResult
|
||||||
|
|
||||||
|
class XinXiiStore(BasicStoreConfig, OpenSearchOPDSStore):
|
||||||
|
|
||||||
|
open_search_url = 'http://www.xinxii.com/catalog-search/'
|
||||||
|
web_url = 'http://xinxii.com/'
|
||||||
|
|
||||||
|
# http://www.xinxii.com/catalog/
|
||||||
|
|
||||||
|
def search(self, query, max_results=10, timeout=60):
|
||||||
|
'''
|
||||||
|
XinXii's open search url is:
|
||||||
|
http://www.xinxii.com/catalog-search/query/?keywords={searchTerms}&pw={startPage?}&doc_lang={docLang}&ff={docFormat},{docFormat},{docFormat}
|
||||||
|
|
||||||
|
This url requires the docLang and docFormat. However, the search itself
|
||||||
|
sent to XinXii does not require them. They can be ignored. We cannot
|
||||||
|
push this into the stanard OpenSearchOPDSStore search because of the
|
||||||
|
required attributes.
|
||||||
|
|
||||||
|
XinXii doesn't return all info supported by OpenSearchOPDSStore search
|
||||||
|
function so this one is modified to remove parts that are used.
|
||||||
|
'''
|
||||||
|
|
||||||
|
url = 'http://www.xinxii.com/catalog-search/query/?keywords=' + urllib.quote_plus(query)
|
||||||
|
|
||||||
|
counter = max_results
|
||||||
|
br = browser()
|
||||||
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
|
doc = etree.fromstring(f.read())
|
||||||
|
for data in doc.xpath('//*[local-name() = "entry"]'):
|
||||||
|
if counter <= 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
counter -= 1
|
||||||
|
|
||||||
|
s = SearchResult()
|
||||||
|
|
||||||
|
s.detail_item = ''.join(data.xpath('./*[local-name() = "id"]/text()')).strip()
|
||||||
|
|
||||||
|
for link in data.xpath('./*[local-name() = "link"]'):
|
||||||
|
rel = link.get('rel')
|
||||||
|
href = link.get('href')
|
||||||
|
type = link.get('type')
|
||||||
|
|
||||||
|
if rel and href and type:
|
||||||
|
if rel in ('http://opds-spec.org/thumbnail', 'http://opds-spec.org/image/thumbnail'):
|
||||||
|
s.cover_url = href
|
||||||
|
if rel == 'alternate':
|
||||||
|
s.detail_item = href
|
||||||
|
|
||||||
|
s.formats = 'EPUB, PDF'
|
||||||
|
|
||||||
|
s.title = ' '.join(data.xpath('./*[local-name() = "title"]//text()')).strip()
|
||||||
|
s.author = ', '.join(data.xpath('./*[local-name() = "author"]//*[local-name() = "name"]//text()')).strip()
|
||||||
|
|
||||||
|
price_e = data.xpath('.//*[local-name() = "price"][1]')
|
||||||
|
if price_e:
|
||||||
|
price_e = price_e[0]
|
||||||
|
currency_code = price_e.get('currencycode', '')
|
||||||
|
price = ''.join(price_e.xpath('.//text()')).strip()
|
||||||
|
s.price = currency_code + ' ' + price
|
||||||
|
s.price = s.price.strip()
|
||||||
|
|
||||||
|
|
||||||
|
yield s
|
@ -640,6 +640,7 @@ class LibraryPage(QWizardPage, LibraryUI):
|
|||||||
metadata_plugins = {
|
metadata_plugins = {
|
||||||
'zh' : ('Douban Books',),
|
'zh' : ('Douban Books',),
|
||||||
'fr' : ('Nicebooks',),
|
'fr' : ('Nicebooks',),
|
||||||
|
'ru' : ('OZON.ru',),
|
||||||
}.get(lang, [])
|
}.get(lang, [])
|
||||||
from calibre.customize.ui import enable_plugin
|
from calibre.customize.ui import enable_plugin
|
||||||
for name in metadata_plugins:
|
for name in metadata_plugins:
|
||||||
|
@ -360,7 +360,7 @@ When you first run |app|, it will ask you for a folder in which to store your bo
|
|||||||
|
|
||||||
Metadata about the books is stored in the file ``metadata.db`` at the top level of the library folder This file is is a sqlite database. When backing up your library make sure you copy the entire folder and all its sub-folders.
|
Metadata about the books is stored in the file ``metadata.db`` at the top level of the library folder This file is is a sqlite database. When backing up your library make sure you copy the entire folder and all its sub-folders.
|
||||||
|
|
||||||
The library folder and all it's contents make up what is called a *|app| library*. You can have multiple such libraries. To manage the libraries, click the |app| icon on the toolbar. You can create new libraries, remove/rename existing ones and switch between libraries easily.
|
The library folder and all it's contents make up what is called a |app| library. You can have multiple such libraries. To manage the libraries, click the |app| icon on the toolbar. You can create new libraries, remove/rename existing ones and switch between libraries easily.
|
||||||
|
|
||||||
You can copy or move books between different libraries (once you have more than one library setup) by right clicking on a book and selecting the :guilabel:`Copy to library` action.
|
You can copy or move books between different libraries (once you have more than one library setup) by right clicking on a book and selecting the :guilabel:`Copy to library` action.
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user