Edelweiss: Workaround broken advanced search

Edelweiss metadata download plugin: Workaround for advanced search being
broken at the Edelweiss website.
This commit is contained in:
Kovid Goyal 2013-07-08 12:52:51 +05:30
parent 3a9fa00032
commit af3d990264

View File

@ -154,8 +154,8 @@ class Worker(Thread): # {{{
# remove all attributes from tags
desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
# Collapse whitespace
#desc = re.sub('\n+', '\n', desc)
#desc = re.sub(' +', ' ', desc)
# desc = re.sub('\n+', '\n', desc)
# desc = re.sub(' +', ' ', desc)
# Remove comments
desc = re.sub(r'(?s)<!--.*?-->', '', desc)
return sanitize_comments_html(desc)
@ -239,6 +239,37 @@ class Edelweiss(Source):
params[k] = v.encode('utf-8')
return BASE_URL+urlencode(params)
def create_query2(self, log, title=None, authors=None, identifiers={}):
''' The edelweiss advanced search appears to be broken, use the keyword search instead, until it is fixed. '''
from urllib import urlencode
BASE_URL = 'http://edelweiss.abovethetreeline.com/CatalogOverview.aspx?'
params = {
'group':'search',
'section':'CatalogOverview',
'searchType':1,
'searchOrgID':'',
'searchCatalogID': '',
'searchMailingID': '',
'searchSelect':1,
}
keywords = []
isbn = check_isbn(identifiers.get('isbn', None))
if isbn is not None:
keywords.append(isbn)
elif title or authors:
title_tokens = list(self.get_title_tokens(title))
if title_tokens:
keywords.extend(title_tokens)
author_tokens = self.get_author_tokens(authors,
only_first_author=True)
if author_tokens:
keywords.extend(author_tokens)
if not keywords:
return None
params['keywords'] = (' '.join(keywords)).encode('utf-8')
return BASE_URL+urlencode(params)
# }}}
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
@ -251,11 +282,12 @@ class Edelweiss(Source):
entries = [(book_url, identifiers['edelweiss'])]
else:
entries = []
query = self.create_query(log, title=title, authors=authors,
query = self.create_query2(log, title=title, authors=authors,
identifiers=identifiers)
if not query:
log.error('Insufficient metadata to construct query')
return
log('Using query URL:', query)
try:
raw = br.open_novisit(query, timeout=timeout).read()
except Exception as e:
@ -270,7 +302,8 @@ class Edelweiss(Source):
for entry in CSSSelect('div.listRow div.listRowMain')(root):
a = entry.xpath('descendant::a[contains(@href, "sku=") and contains(@href, "ProductDetailPage.aspx")]')
if not a: continue
if not a:
continue
href = a[0].get('href')
prefix, qs = href.partition('?')[0::2]
sku = parse_qs(qs).get('sku', None)
@ -395,3 +428,5 @@ if __name__ == '__main__':
test_identify_plugin(Edelweiss.name, tests)