mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
prioritized results with covers, cleaned up print statements
This commit is contained in:
parent
2b82d49448
commit
7b196c762b
@ -55,9 +55,9 @@ class OverDrive(Source):
|
|||||||
ovrdrv_id = ovrdrv_data[7]
|
ovrdrv_id = ovrdrv_data[7]
|
||||||
if isbn is not None:
|
if isbn is not None:
|
||||||
self.cache_isbn_to_identifier(isbn, ovrdrv_id)
|
self.cache_isbn_to_identifier(isbn, ovrdrv_id)
|
||||||
|
|
||||||
self.get_book_detail(br, ovrdrv_data[1], mi, ovrdrv_id, log)
|
self.get_book_detail(br, ovrdrv_data[1], mi, ovrdrv_id, log)
|
||||||
|
|
||||||
result_queue.put(mi)
|
result_queue.put(mi)
|
||||||
|
|
||||||
return None
|
return None
|
||||||
@ -144,7 +144,7 @@ class OverDrive(Source):
|
|||||||
if author_tokens:
|
if author_tokens:
|
||||||
q += ('+' if q else '') + build_term('author',
|
q += ('+' if q else '') + build_term('author',
|
||||||
author_tokens)
|
author_tokens)
|
||||||
|
|
||||||
if isinstance(q, unicode):
|
if isinstance(q, unicode):
|
||||||
q = q.encode('utf-8')
|
q = q.encode('utf-8')
|
||||||
if not q:
|
if not q:
|
||||||
@ -162,7 +162,7 @@ class OverDrive(Source):
|
|||||||
'http://spl.lib.overdrive.com/5875E082-4CB2-4689-9426-8509F354AFEF/10/335/en/'
|
'http://spl.lib.overdrive.com/5875E082-4CB2-4689-9426-8509F354AFEF/10/335/en/'
|
||||||
]
|
]
|
||||||
return choices[random.randint(0, len(choices)-1)]
|
return choices[random.randint(0, len(choices)-1)]
|
||||||
|
|
||||||
def format_results(self, reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid):
|
def format_results(self, reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid):
|
||||||
fix_slashes = re.compile(r'\\/')
|
fix_slashes = re.compile(r'\\/')
|
||||||
thumbimage = fix_slashes.sub('/', thumbimage)
|
thumbimage = fix_slashes.sub('/', thumbimage)
|
||||||
@ -181,7 +181,7 @@ class OverDrive(Source):
|
|||||||
if m:
|
if m:
|
||||||
series_num = float(m.group(1))
|
series_num = float(m.group(1))
|
||||||
return [cover_url, social_metadata_url, worldcatlink, series, series_num, publisher, creators, reserveid, title]
|
return [cover_url, social_metadata_url, worldcatlink, series, series_num, publisher, creators, reserveid, title]
|
||||||
|
|
||||||
def safe_query(self, br, query_url, post=''):
|
def safe_query(self, br, query_url, post=''):
|
||||||
'''
|
'''
|
||||||
The query must be initialized by loading an empty search results page
|
The query must be initialized by loading an empty search results page
|
||||||
@ -212,33 +212,29 @@ class OverDrive(Source):
|
|||||||
q_query = q+'default.aspx/SearchByKeyword'
|
q_query = q+'default.aspx/SearchByKeyword'
|
||||||
q_init_search = q+'SearchResults.aspx'
|
q_init_search = q+'SearchResults.aspx'
|
||||||
# get first author as string - convert this to a proper cleanup function later
|
# get first author as string - convert this to a proper cleanup function later
|
||||||
print "printing list with author "+str(author)+":"
|
|
||||||
author_tokens = list(self.get_author_tokens(author,
|
author_tokens = list(self.get_author_tokens(author,
|
||||||
only_first_author=True))
|
only_first_author=True))
|
||||||
print list(author_tokens)
|
title_tokens = list(self.get_title_tokens(title,
|
||||||
title_tokens = list(self.get_title_tokens(title, False, True))
|
strip_joiners=False, strip_subtitle=True))
|
||||||
print "there are "+str(len(title_tokens))+" title tokens"
|
|
||||||
for token in title_tokens:
|
|
||||||
print "cleaned up title token is: "+str(token)
|
|
||||||
|
|
||||||
if len(title_tokens) >= len(author_tokens):
|
if len(title_tokens) >= len(author_tokens):
|
||||||
initial_q = ' '.join(title_tokens)
|
initial_q = ' '.join(title_tokens)
|
||||||
xref_q = '+'.join(author_tokens)
|
xref_q = '+'.join(author_tokens)
|
||||||
else:
|
else:
|
||||||
initial_q = ' '.join(author_tokens)
|
initial_q = ' '.join(author_tokens)
|
||||||
xref_q = '+'.join(title_tokens)
|
xref_q = '+'.join(title_tokens)
|
||||||
|
|
||||||
q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
|
q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
|
||||||
query = '{"szKeyword":"'+initial_q+'"}'
|
query = '{"szKeyword":"'+initial_q+'"}'
|
||||||
|
|
||||||
# main query, requires specific Content Type header
|
# main query, requires specific Content Type header
|
||||||
req = mechanize.Request(q_query)
|
req = mechanize.Request(q_query)
|
||||||
req.add_header('Content-Type', 'application/json; charset=utf-8')
|
req.add_header('Content-Type', 'application/json; charset=utf-8')
|
||||||
br.open_novisit(req, query)
|
br.open_novisit(req, query)
|
||||||
|
|
||||||
# initiate the search without messing up the cookiejar
|
# initiate the search without messing up the cookiejar
|
||||||
self.safe_query(br, q_init_search)
|
self.safe_query(br, q_init_search)
|
||||||
|
|
||||||
# get the search results object
|
# get the search results object
|
||||||
results = False
|
results = False
|
||||||
while results == False:
|
while results == False:
|
||||||
@ -256,16 +252,13 @@ class OverDrive(Source):
|
|||||||
elif int(m.group('totalrecords')) == 0:
|
elif int(m.group('totalrecords')) == 0:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
print "\n\nsorting results"
|
|
||||||
return self.sort_ovrdrv_results(raw, title, title_tokens, author, author_tokens)
|
return self.sort_ovrdrv_results(raw, title, title_tokens, author, author_tokens)
|
||||||
|
|
||||||
|
|
||||||
def sort_ovrdrv_results(self, raw, title=None, title_tokens=None, author=None, author_tokens=None, ovrdrv_id=None):
|
def sort_ovrdrv_results(self, raw, title=None, title_tokens=None, author=None, author_tokens=None, ovrdrv_id=None):
|
||||||
print "\ntitle to search for is "+str(title)+"\nauthor to search for is "+str(author)
|
|
||||||
close_matches = []
|
close_matches = []
|
||||||
raw = re.sub('.*?\[\[(?P<content>.*?)\]\].*', '[[\g<content>]]', raw)
|
raw = re.sub('.*?\[\[(?P<content>.*?)\]\].*', '[[\g<content>]]', raw)
|
||||||
results = eval(raw)
|
results = eval(raw)
|
||||||
print "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
|
|
||||||
#print results
|
#print results
|
||||||
# The search results are either from a keyword search or a multi-format list from a single ID,
|
# The search results are either from a keyword search or a multi-format list from a single ID,
|
||||||
# sort through the results for closest match/format
|
# sort through the results for closest match/format
|
||||||
@ -273,44 +266,36 @@ class OverDrive(Source):
|
|||||||
for reserveid, od_title, subtitle, edition, series, publisher, format, formatid, creators, \
|
for reserveid, od_title, subtitle, edition, series, publisher, format, formatid, creators, \
|
||||||
thumbimage, shortdescription, worldcatlink, excerptlink, creatorfile, sorttitle, \
|
thumbimage, shortdescription, worldcatlink, excerptlink, creatorfile, sorttitle, \
|
||||||
availabletolibrary, availabletoretailer, relevancyrank, unknown1, unknown2, unknown3 in results:
|
availabletolibrary, availabletoretailer, relevancyrank, unknown1, unknown2, unknown3 in results:
|
||||||
print "this record's title is "+od_title+", subtitle is "+subtitle+", author[s] are "+creators+", series is "+series
|
#print "this record's title is "+od_title+", subtitle is "+subtitle+", author[s] are "+creators+", series is "+series
|
||||||
if ovrdrv_id is not None and int(formatid) in [1, 50, 410, 900]:
|
if ovrdrv_id is not None and int(formatid) in [1, 50, 410, 900]:
|
||||||
print "overdrive id is not None, searching based on format type priority"
|
#print "overdrive id is not None, searching based on format type priority"
|
||||||
return self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)
|
return self.format_results(reserveid, od_title, subtitle, series, publisher,
|
||||||
|
creators, thumbimage, worldcatlink, formatid)
|
||||||
else:
|
else:
|
||||||
creators = creators.split(', ')
|
creators = creators.split(', ')
|
||||||
print "split creators from results are: "+str(creators)+", there are "+str(len(creators))+" total"
|
|
||||||
# if an exact match in a preferred format occurs
|
# if an exact match in a preferred format occurs
|
||||||
if creators[0] == author[0] and od_title == title and int(formatid) in [1, 50, 410, 900]:
|
if creators[0] == author[0] and od_title == title and int(formatid) in [1, 50, 410, 900]:
|
||||||
print "Got Exact Match!!!"
|
return self.format_results(reserveid, od_title, subtitle, series, publisher,
|
||||||
return self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)
|
creators, thumbimage, worldcatlink, formatid)
|
||||||
else:
|
else:
|
||||||
close_title_match = False
|
close_title_match = False
|
||||||
close_author_match = False
|
close_author_match = False
|
||||||
print "format id is "+str(formatid)
|
|
||||||
for token in title_tokens:
|
for token in title_tokens:
|
||||||
print "attempting to find "+str(token)+" title token"
|
|
||||||
if od_title.lower().find(token.lower()) != -1:
|
if od_title.lower().find(token.lower()) != -1:
|
||||||
print "matched token"
|
|
||||||
close_title_match = True
|
close_title_match = True
|
||||||
else:
|
else:
|
||||||
print "token didn't match"
|
|
||||||
close_title_match = False
|
close_title_match = False
|
||||||
break
|
break
|
||||||
for author in creators:
|
for author in creators:
|
||||||
print "matching tokens for "+str(author)
|
|
||||||
for token in author_tokens:
|
for token in author_tokens:
|
||||||
print "attempting to find "+str(token)+" author token"
|
|
||||||
if author.lower().find(token.lower()) != -1:
|
if author.lower().find(token.lower()) != -1:
|
||||||
print "matched token"
|
|
||||||
close_author_match = True
|
close_author_match = True
|
||||||
else:
|
else:
|
||||||
print "token didn't match"
|
|
||||||
close_author_match = False
|
close_author_match = False
|
||||||
break
|
break
|
||||||
if close_author_match:
|
if close_author_match:
|
||||||
break
|
break
|
||||||
if close_title_match and close_author_match and int(formatid) in [1, 50, 410, 900]:
|
if close_title_match and close_author_match and int(formatid) in [1, 50, 410, 900] and thumbimage:
|
||||||
if subtitle and series:
|
if subtitle and series:
|
||||||
close_matches.insert(0, self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
|
close_matches.insert(0, self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
|
||||||
else:
|
else:
|
||||||
@ -321,18 +306,18 @@ class OverDrive(Source):
|
|||||||
return ''
|
return ''
|
||||||
else:
|
else:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
|
||||||
def overdrive_get_record(self, br, q, ovrdrv_id):
|
def overdrive_get_record(self, br, q, ovrdrv_id):
|
||||||
search_url = q+'SearchResults.aspx?ReserveID={'+ovrdrv_id+'}'
|
search_url = q+'SearchResults.aspx?ReserveID={'+ovrdrv_id+'}'
|
||||||
results_url = q+'SearchResults.svc/GetResults?sEcho=1&iColumns=18&sColumns=ReserveID%2CTitle%2CSubtitle%2CEdition%2CSeries%2CPublisher%2CFormat%2CFormatID%2CCreators%2CThumbImage%2CShortDescription%2CWorldCatLink%2CExcerptLink%2CCreatorFile%2CSortTitle%2CAvailableToLibrary%2CAvailableToRetailer%2CRelevancyRank&iDisplayStart=0&iDisplayLength=10&sSearch=&bEscapeRegex=true&iSortingCols=1&iSortCol_0=17&sSortDir_0=asc'
|
results_url = q+'SearchResults.svc/GetResults?sEcho=1&iColumns=18&sColumns=ReserveID%2CTitle%2CSubtitle%2CEdition%2CSeries%2CPublisher%2CFormat%2CFormatID%2CCreators%2CThumbImage%2CShortDescription%2CWorldCatLink%2CExcerptLink%2CCreatorFile%2CSortTitle%2CAvailableToLibrary%2CAvailableToRetailer%2CRelevancyRank&iDisplayStart=0&iDisplayLength=10&sSearch=&bEscapeRegex=true&iSortingCols=1&iSortCol_0=17&sSortDir_0=asc'
|
||||||
|
|
||||||
# get the base url to set the proper session cookie
|
# get the base url to set the proper session cookie
|
||||||
br.open_novisit(q)
|
br.open_novisit(q)
|
||||||
|
|
||||||
# initialize the search
|
# initialize the search
|
||||||
self.safe_query(br, search_url)
|
self.safe_query(br, search_url)
|
||||||
|
|
||||||
# get the results
|
# get the results
|
||||||
req = mechanize.Request(results_url)
|
req = mechanize.Request(results_url)
|
||||||
req.add_header('X-Requested-With', 'XMLHttpRequest')
|
req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||||
@ -385,7 +370,7 @@ class OverDrive(Source):
|
|||||||
'''
|
'''
|
||||||
Parse the formatted search results from the initial Overdrive query and
|
Parse the formatted search results from the initial Overdrive query and
|
||||||
add the values to the metadta.
|
add the values to the metadta.
|
||||||
|
|
||||||
The list object has these values:
|
The list object has these values:
|
||||||
[cover_url[0], social_metadata_url[1], worldcatlink[2], series[3], series_num[4],
|
[cover_url[0], social_metadata_url[1], worldcatlink[2], series[3], series_num[4],
|
||||||
publisher[5], creators[6], reserveid[7], title[8]]
|
publisher[5], creators[6], reserveid[7], title[8]]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user