Metadata download: Workaround edelweiss.com no longer being able to search by author name.

Search only by titles and filter the results returned by edelweiss to
only include matching authors.
This commit is contained in:
Kovid Goyal 2014-02-01 12:11:14 +05:30
parent d9806a5e6e
commit fb72c922d6

View File

@ -253,6 +253,9 @@ class Edelweiss(Source):
log.exception('Failed to parse identify results') log.exception('Failed to parse identify results')
return as_unicode(e) return as_unicode(e)
has_isbn = check_isbn(identifiers.get('isbn', None)) is not None
if not has_isbn:
author_tokens = set(x.lower() for x in self.get_author_tokens(authors, only_first_author=True))
for entry in CSSSelect('div.listRow div.listRowMain')(root): for entry in CSSSelect('div.listRow div.listRowMain')(root):
a = entry.xpath('descendant::a[contains(@href, "sku=") and contains(@href, "productDetailPage.aspx")]') a = entry.xpath('descendant::a[contains(@href, "sku=") and contains(@href, "productDetailPage.aspx")]')
if not a: if not a:
@ -276,6 +279,13 @@ class Edelweiss(Source):
text = astext(div[0]).lower() text = astext(div[0]).lower()
if 'audio' in text or 'mp3' in text: # Audio-book, ignore if 'audio' in text or 'mp3' in text: # Audio-book, ignore
continue continue
if not has_isbn:
# edelweiss returns matches based only on title, so we
# filter by author manually
div = CSSSelect('div.contributor.attGroup')(entry)
entry_authors = set(self.get_author_tokens([x.strip() for x in astext(div[0]).lower().split(',')]))
if not entry_authors.intersection(author_tokens):
continue
entries.append((self._get_book_url(sku), sku)) entries.append((self._get_book_url(sku), sku))
if (not entries and identifiers and title and authors and if (not entries and identifiers and title and authors and
@ -349,9 +359,9 @@ if __name__ == '__main__':
test_identify_plugin, title_test, authors_test, comments_test, pubdate_test) test_identify_plugin, title_test, authors_test, comments_test, pubdate_test)
tests = [ tests = [
( # A title and author search ( # A title and author search
{'title': 'Flame of Sevenwaters', 'authors':['Juliet Marillier']}, {'title': 'The Husband\'s Secret', 'authors':['Liane Moriarty']},
[title_test('Flame of sevenwaters', exact=True), [title_test('The Husband\'s Secret', exact=True),
authors_test(['Juliet Marillier'])] authors_test(['Liane Moriarty'])]
), ),
( # An isbn present in edelweiss ( # An isbn present in edelweiss