diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py index 53d336a23d..286bcee9d0 100644 --- a/src/calibre/ebooks/metadata/book/base.py +++ b/src/calibre/ebooks/metadata/book/base.py @@ -710,7 +710,8 @@ class Metadata(object): fmt('Title sort', self.title_sort) if self.authors: fmt('Author(s)', authors_to_string(self.authors) + \ - ((' [' + self.author_sort + ']') if self.author_sort else '')) + ((' [' + self.author_sort + ']') + if self.author_sort and self.author_sort != _('Unknown') else '')) if self.publisher: fmt('Publisher', self.publisher) if getattr(self, 'book_producer', False): diff --git a/src/calibre/ebooks/metadata/sources/ozon.py b/src/calibre/ebooks/metadata/sources/ozon.py index fa9951c40c..ecec13662f 100644 --- a/src/calibre/ebooks/metadata/sources/ozon.py +++ b/src/calibre/ebooks/metadata/sources/ozon.py @@ -11,7 +11,7 @@ import datetime from urllib import quote_plus from Queue import Queue, Empty from lxml import etree, html -from calibre import as_unicode +from calibre import prints, as_unicode from calibre.ebooks.chardet import xml_to_unicode @@ -54,7 +54,8 @@ class Ozon(Source): def create_query(self, log, title=None, authors=None, identifiers={}): # {{{ # div_book -> search only books, ebooks and audio books search_url = self.ozon_url + '/webservice/webservice.asmx/SearchWebService?searchContext=div_book&searchText=' - + + # for ozon.ru search we have to format ISBN with '-' isbn = _format_isbn(log, identifiers.get('isbn', None)) # TODO: format isbn! qItems = set([isbn, title]) @@ -64,7 +65,7 @@ class Ozon(Source): qItems.discard('') qItems = map(_quoteString, qItems) - q = ' '.join(qItems).strip() + q = u' '.join(qItems).strip() log.info(u'search string: ' + q) if isinstance(q, unicode): @@ -78,13 +79,13 @@ class Ozon(Source): return search_url # }}} - def identify(self, log, result_queue, abort, title=None, authors=None, # {{{ - identifiers={}, timeout=30): + def identify(self, log, result_queue, abort, title=None, authors=None, + identifiers={}, timeout=30): # {{{ if not self.is_configured(): return query = self.create_query(log, title=title, authors=authors, identifiers=identifiers) if not query: - err = 'Insufficient metadata to construct query' + err = u'Insufficient metadata to construct query' log.error(err) return err @@ -109,7 +110,7 @@ class Ozon(Source): # }}} def get_metadata(self, log, entries, title, authors, identifiers): # {{{ - # some book titles have extra charactes like this + # some book titles have extra characters like this # TODO: make a twick reRemoveFromTitle = None #reRemoveFromTitle = re.compile(r'[?!:.,;+-/&%"\'=]') @@ -160,7 +161,7 @@ class Ozon(Source): mi.source_relevance = i if ensure_metadata_match(mi): metadata.append(mi) - # log.debug(u'added metadata %s %s. '%(mi.title, mi.authors)) + #log.debug(u'added metadata %s %s.'%(mi.title, mi.authors)) else: log.debug(u'skipped metadata %s %s. (does not match the query)'%(mi.title, mi.authors)) return metadata @@ -285,12 +286,12 @@ class Ozon(Source): url = self.get_book_url(metadata.get_identifiers())[2] raw = self.browser.open_novisit(url, timeout=timeout).read() - doc = html.fromstring(raw) + doc = html.fromstring(xml_to_unicode(raw, verbose=True)[0]) xpt_prod_det_at = u'string(//div[contains(@class, "product-detail")]//*[contains(normalize-space(text()), "%s")]/a[1]/@title)' xpt_prod_det_tx = u'substring-after(//div[contains(@class, "product-detail")]//text()[contains(., "%s")], ":")' - # series + # series Серия/Серии xpt = xpt_prod_det_at % u'Сери' # % u'Серия:' series = doc.xpath(xpt) @@ -300,7 +301,7 @@ class Ozon(Source): xpt = u'normalize-space(substring-after(//meta[@name="description"]/@content, "ISBN"))' isbn_str = doc.xpath(xpt) if isbn_str: - all_isbns = [check_isbn(isbn) for isbn in self.isbnRegex.findall(isbn_str) if check_isbn(isbn)] + all_isbns = [check_isbn(isbn) for isbn in self.isbnRegex.findall(isbn_str) if _verifyISBNIntegrity(log, isbn)] if all_isbns: metadata.all_isbns = all_isbns metadata.isbn = all_isbns[0] @@ -333,10 +334,10 @@ class Ozon(Source): xpt = u'//table[@id="detail_description"]//tr/td' comment_elem = doc.xpath(xpt) if comment_elem: - comments = unicode(etree.tostring(comment_elem[0])) + comments = unicode(etree.tostring(comment_elem[0], encoding=unicode)) if comments: # cleanup root tag, TODO: remove tags like object/embeded - comments = re.sub(r'\A.*?
.epub, .fb2.zip, .pdf
#