mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Edelweiss: Workaround broken advanced search
Edelweiss metadata download plugin: Workaround for advanced search being broken at the Edelweiss website.
This commit is contained in:
parent
3a9fa00032
commit
af3d990264
@ -154,8 +154,8 @@ class Worker(Thread): # {{{
|
|||||||
# remove all attributes from tags
|
# remove all attributes from tags
|
||||||
desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
|
desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
|
||||||
# Collapse whitespace
|
# Collapse whitespace
|
||||||
#desc = re.sub('\n+', '\n', desc)
|
# desc = re.sub('\n+', '\n', desc)
|
||||||
#desc = re.sub(' +', ' ', desc)
|
# desc = re.sub(' +', ' ', desc)
|
||||||
# Remove comments
|
# Remove comments
|
||||||
desc = re.sub(r'(?s)<!--.*?-->', '', desc)
|
desc = re.sub(r'(?s)<!--.*?-->', '', desc)
|
||||||
return sanitize_comments_html(desc)
|
return sanitize_comments_html(desc)
|
||||||
@ -239,6 +239,37 @@ class Edelweiss(Source):
|
|||||||
params[k] = v.encode('utf-8')
|
params[k] = v.encode('utf-8')
|
||||||
|
|
||||||
return BASE_URL+urlencode(params)
|
return BASE_URL+urlencode(params)
|
||||||
|
|
||||||
|
def create_query2(self, log, title=None, authors=None, identifiers={}):
|
||||||
|
''' The edelweiss advanced search appears to be broken, use the keyword search instead, until it is fixed. '''
|
||||||
|
from urllib import urlencode
|
||||||
|
BASE_URL = 'http://edelweiss.abovethetreeline.com/CatalogOverview.aspx?'
|
||||||
|
params = {
|
||||||
|
'group':'search',
|
||||||
|
'section':'CatalogOverview',
|
||||||
|
'searchType':1,
|
||||||
|
'searchOrgID':'',
|
||||||
|
'searchCatalogID': '',
|
||||||
|
'searchMailingID': '',
|
||||||
|
'searchSelect':1,
|
||||||
|
}
|
||||||
|
keywords = []
|
||||||
|
isbn = check_isbn(identifiers.get('isbn', None))
|
||||||
|
if isbn is not None:
|
||||||
|
keywords.append(isbn)
|
||||||
|
elif title or authors:
|
||||||
|
title_tokens = list(self.get_title_tokens(title))
|
||||||
|
if title_tokens:
|
||||||
|
keywords.extend(title_tokens)
|
||||||
|
author_tokens = self.get_author_tokens(authors,
|
||||||
|
only_first_author=True)
|
||||||
|
if author_tokens:
|
||||||
|
keywords.extend(author_tokens)
|
||||||
|
if not keywords:
|
||||||
|
return None
|
||||||
|
params['keywords'] = (' '.join(keywords)).encode('utf-8')
|
||||||
|
return BASE_URL+urlencode(params)
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
|
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
|
||||||
@ -251,11 +282,12 @@ class Edelweiss(Source):
|
|||||||
entries = [(book_url, identifiers['edelweiss'])]
|
entries = [(book_url, identifiers['edelweiss'])]
|
||||||
else:
|
else:
|
||||||
entries = []
|
entries = []
|
||||||
query = self.create_query(log, title=title, authors=authors,
|
query = self.create_query2(log, title=title, authors=authors,
|
||||||
identifiers=identifiers)
|
identifiers=identifiers)
|
||||||
if not query:
|
if not query:
|
||||||
log.error('Insufficient metadata to construct query')
|
log.error('Insufficient metadata to construct query')
|
||||||
return
|
return
|
||||||
|
log('Using query URL:', query)
|
||||||
try:
|
try:
|
||||||
raw = br.open_novisit(query, timeout=timeout).read()
|
raw = br.open_novisit(query, timeout=timeout).read()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -270,7 +302,8 @@ class Edelweiss(Source):
|
|||||||
|
|
||||||
for entry in CSSSelect('div.listRow div.listRowMain')(root):
|
for entry in CSSSelect('div.listRow div.listRowMain')(root):
|
||||||
a = entry.xpath('descendant::a[contains(@href, "sku=") and contains(@href, "ProductDetailPage.aspx")]')
|
a = entry.xpath('descendant::a[contains(@href, "sku=") and contains(@href, "ProductDetailPage.aspx")]')
|
||||||
if not a: continue
|
if not a:
|
||||||
|
continue
|
||||||
href = a[0].get('href')
|
href = a[0].get('href')
|
||||||
prefix, qs = href.partition('?')[0::2]
|
prefix, qs = href.partition('?')[0::2]
|
||||||
sku = parse_qs(qs).get('sku', None)
|
sku = parse_qs(qs).get('sku', None)
|
||||||
@ -395,3 +428,5 @@ if __name__ == '__main__':
|
|||||||
test_identify_plugin(Edelweiss.name, tests)
|
test_identify_plugin(Edelweiss.name, tests)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user