Update parsing code according to the new ozon.ru page markup

This commit is contained in:
madhead 2016-09-20 22:13:03 +03:00
parent 7809a6798c
commit 4e9f242fe1
No known key found for this signature in database
GPG Key ID: AA7CF646F07CB399

View File

@ -297,10 +297,10 @@ class Ozon(Source):
# }}} # }}}
def to_metadata(self, log, entry): # {{{ def to_metadata(self, log, entry): # {{{
title = unicode(entry.xpath(u'normalize-space(.//span[@itemprop="name"][1]/text())')) title = unicode(entry.xpath(u'normalize-space(.//div[@itemprop="name"][1]/text())'))
# log.debug(u'Title: -----> %s' % title) # log.debug(u'Title: -----> %s' % title)
author = unicode(entry.xpath(u'normalize-space(.//a[contains(@href, "person")])')) author = unicode(entry.xpath(u'normalize-space(.//div[contains(@class, "mPerson")])'))
# log.debug(u'Author: -----> %s' % author) # log.debug(u'Author: -----> %s' % author)
norm_authors = map(_normalizeAuthorNameWithInitials, map(unicode.strip, unicode(author).split(u','))) norm_authors = map(_normalizeAuthorNameWithInitials, map(unicode.strip, unicode(author).split(u',')))