mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
...
This commit is contained in:
parent
09da88b6d1
commit
aa30f306b5
@ -45,7 +45,6 @@ class OverDrive(Source):
|
|||||||
isbn = identifiers.get('isbn', None)
|
isbn = identifiers.get('isbn', None)
|
||||||
|
|
||||||
br = self.browser
|
br = self.browser
|
||||||
print "in identify, calling to_ovrdrv_data"
|
|
||||||
ovrdrv_data = self.to_ovrdrv_data(br, title, authors, ovrdrv_id)
|
ovrdrv_data = self.to_ovrdrv_data(br, title, authors, ovrdrv_id)
|
||||||
if ovrdrv_data:
|
if ovrdrv_data:
|
||||||
title = ovrdrv_data[8]
|
title = ovrdrv_data[8]
|
||||||
@ -83,7 +82,6 @@ class OverDrive(Source):
|
|||||||
if cached_url is None:
|
if cached_url is None:
|
||||||
log.info('No cached cover found, running identify')
|
log.info('No cached cover found, running identify')
|
||||||
rq = Queue()
|
rq = Queue()
|
||||||
print "inside download cover, calling identify"
|
|
||||||
self.identify(log, rq, abort, title=title, authors=authors,
|
self.identify(log, rq, abort, title=title, authors=authors,
|
||||||
identifiers=identifiers)
|
identifiers=identifiers)
|
||||||
if abort.is_set():
|
if abort.is_set():
|
||||||
@ -110,7 +108,6 @@ class OverDrive(Source):
|
|||||||
ovrdrv_id = identifiers.get('overdrive', None)
|
ovrdrv_id = identifiers.get('overdrive', None)
|
||||||
br = self.browser
|
br = self.browser
|
||||||
referer = self.get_base_referer()+'ContentDetails-Cover.htm?ID='+ovrdrv_id
|
referer = self.get_base_referer()+'ContentDetails-Cover.htm?ID='+ovrdrv_id
|
||||||
print "downloading cover, referer is "+str(referer)
|
|
||||||
req = mechanize.Request(cached_url)
|
req = mechanize.Request(cached_url)
|
||||||
req.add_header('referer', referer)
|
req.add_header('referer', referer)
|
||||||
log('Downloading cover from:', cached_url)
|
log('Downloading cover from:', cached_url)
|
||||||
@ -124,7 +121,6 @@ class OverDrive(Source):
|
|||||||
def get_cached_cover_url(self, identifiers): # {{{
|
def get_cached_cover_url(self, identifiers): # {{{
|
||||||
url = None
|
url = None
|
||||||
ovrdrv_id = identifiers.get('overdrive', None)
|
ovrdrv_id = identifiers.get('overdrive', None)
|
||||||
print "inside get_cached_cover_url, ovrdrv_id is "+str(ovrdrv_id)
|
|
||||||
if ovrdrv_id is None:
|
if ovrdrv_id is None:
|
||||||
isbn = identifiers.get('isbn', None)
|
isbn = identifiers.get('isbn', None)
|
||||||
if isbn is not None:
|
if isbn is not None:
|
||||||
@ -217,14 +213,9 @@ class OverDrive(Source):
|
|||||||
q_init_search = q+'SearchResults.aspx'
|
q_init_search = q+'SearchResults.aspx'
|
||||||
# get first author as string - convert this to a proper cleanup function later
|
# get first author as string - convert this to a proper cleanup function later
|
||||||
s = Source(None)
|
s = Source(None)
|
||||||
print "printing list with author "+str(author)+":"
|
|
||||||
author_tokens = list(s.get_author_tokens(author))
|
author_tokens = list(s.get_author_tokens(author))
|
||||||
print list(author_tokens)
|
|
||||||
title_tokens = list(s.get_title_tokens(title, False, True))
|
title_tokens = list(s.get_title_tokens(title, False, True))
|
||||||
print "there are "+str(len(title_tokens))+" title tokens"
|
|
||||||
for token in title_tokens:
|
|
||||||
print "cleaned up title token is: "+str(token)
|
|
||||||
|
|
||||||
if len(title_tokens) >= len(author_tokens):
|
if len(title_tokens) >= len(author_tokens):
|
||||||
initial_q = ' '.join(title_tokens)
|
initial_q = ' '.join(title_tokens)
|
||||||
xref_q = '+'.join(author_tokens)
|
xref_q = '+'.join(author_tokens)
|
||||||
@ -232,8 +223,6 @@ class OverDrive(Source):
|
|||||||
initial_q = ' '.join(author_tokens)
|
initial_q = ' '.join(author_tokens)
|
||||||
xref_q = '+'.join(title_tokens)
|
xref_q = '+'.join(title_tokens)
|
||||||
|
|
||||||
print "initial query is "+str(initial_q)
|
|
||||||
print "cross reference query is "+str(xref_q)
|
|
||||||
q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
|
q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
|
||||||
query = '{"szKeyword":"'+initial_q+'"}'
|
query = '{"szKeyword":"'+initial_q+'"}'
|
||||||
|
|
||||||
@ -242,7 +231,6 @@ class OverDrive(Source):
|
|||||||
req.add_header('Content-Type', 'application/json; charset=utf-8')
|
req.add_header('Content-Type', 'application/json; charset=utf-8')
|
||||||
br.open_novisit(req, query)
|
br.open_novisit(req, query)
|
||||||
|
|
||||||
print "q_init_search is "+q_init_search
|
|
||||||
# initiate the search without messing up the cookiejar
|
# initiate the search without messing up the cookiejar
|
||||||
self.safe_query(br, q_init_search)
|
self.safe_query(br, q_init_search)
|
||||||
|
|
||||||
@ -254,7 +242,6 @@ class OverDrive(Source):
|
|||||||
xreq.add_header('Referer', q_init_search)
|
xreq.add_header('Referer', q_init_search)
|
||||||
xreq.add_header('Accept', 'application/json, text/javascript, */*')
|
xreq.add_header('Accept', 'application/json, text/javascript, */*')
|
||||||
raw = br.open_novisit(xreq).read()
|
raw = br.open_novisit(xreq).read()
|
||||||
print "overdrive search result is:\n"+raw
|
|
||||||
for m in re.finditer(ur'"iTotalDisplayRecords":(?P<displayrecords>\d+).*?"iTotalRecords":(?P<totalrecords>\d+)', raw):
|
for m in re.finditer(ur'"iTotalDisplayRecords":(?P<displayrecords>\d+).*?"iTotalRecords":(?P<totalrecords>\d+)', raw):
|
||||||
if int(m.group('displayrecords')) >= 1:
|
if int(m.group('displayrecords')) >= 1:
|
||||||
results = True
|
results = True
|
||||||
@ -264,54 +251,40 @@ class OverDrive(Source):
|
|||||||
elif int(m.group('totalrecords')) == 0:
|
elif int(m.group('totalrecords')) == 0:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
print "\n\nsorting results"
|
|
||||||
return self.sort_ovrdrv_results(raw, title, title_tokens, author, author_tokens)
|
return self.sort_ovrdrv_results(raw, title, title_tokens, author, author_tokens)
|
||||||
|
|
||||||
|
|
||||||
def sort_ovrdrv_results(self, raw, title=None, title_tokens=None, author=None, author_tokens=None, ovrdrv_id=None):
|
def sort_ovrdrv_results(self, raw, title=None, title_tokens=None, author=None, author_tokens=None, ovrdrv_id=None):
|
||||||
print "\ntitle to search for is "+str(title)+"\nauthor to search for is "+str(author)
|
|
||||||
close_matches = []
|
close_matches = []
|
||||||
raw = re.sub('.*?\[\[(?P<content>.*?)\]\].*', '[[\g<content>]]', raw)
|
raw = re.sub('.*?\[\[(?P<content>.*?)\]\].*', '[[\g<content>]]', raw)
|
||||||
results = eval(raw)
|
results = eval(raw)
|
||||||
print "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
|
|
||||||
#print results
|
|
||||||
# The search results are either from a keyword search or a multi-format list from a single ID,
|
# The search results are either from a keyword search or a multi-format list from a single ID,
|
||||||
# sort through the results for closest match/format
|
# sort through the results for closest match/format
|
||||||
if results:
|
if results:
|
||||||
for reserveid, od_title, subtitle, edition, series, publisher, format, formatid, creators, \
|
for reserveid, od_title, subtitle, edition, series, publisher, format, formatid, creators, \
|
||||||
thumbimage, shortdescription, worldcatlink, excerptlink, creatorfile, sorttitle, \
|
thumbimage, shortdescription, worldcatlink, excerptlink, creatorfile, sorttitle, \
|
||||||
availabletolibrary, availabletoretailer, relevancyrank, unknown1, unknown2, unknown3 in results:
|
availabletolibrary, availabletoretailer, relevancyrank, unknown1, unknown2, unknown3 in results:
|
||||||
print "this record's title is "+od_title+", subtitle is "+subtitle+", author[s] are "+creators+", series is "+series
|
|
||||||
if ovrdrv_id is not None and int(formatid) in [1, 50, 410, 900]:
|
if ovrdrv_id is not None and int(formatid) in [1, 50, 410, 900]:
|
||||||
print "overdrive id is not None, searching based on format type priority"
|
|
||||||
return self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)
|
return self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)
|
||||||
else:
|
else:
|
||||||
creators = creators.split(', ')
|
creators = creators.split(', ')
|
||||||
print "split creators from results are: "+str(creators)
|
|
||||||
# if an exact match in a preferred format occurs
|
# if an exact match in a preferred format occurs
|
||||||
if creators[0] == author[0] and od_title == title and int(formatid) in [1, 50, 410, 900]:
|
if creators[0] == author[0] and od_title == title and int(formatid) in [1, 50, 410, 900]:
|
||||||
print "Got Exact Match!!!"
|
|
||||||
return self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)
|
return self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)
|
||||||
else:
|
else:
|
||||||
close_title_match = False
|
close_title_match = False
|
||||||
close_author_match = False
|
close_author_match = False
|
||||||
print "format id is "+str(formatid)
|
|
||||||
for token in title_tokens:
|
for token in title_tokens:
|
||||||
print "attempting to find "+str(token)+" title token"
|
|
||||||
if od_title.lower().find(token.lower()) != -1:
|
if od_title.lower().find(token.lower()) != -1:
|
||||||
print "matched token"
|
|
||||||
close_title_match = True
|
close_title_match = True
|
||||||
else:
|
else:
|
||||||
print "token didn't match"
|
|
||||||
close_title_match = False
|
close_title_match = False
|
||||||
break
|
break
|
||||||
for token in author_tokens:
|
for token in author_tokens:
|
||||||
print "attempting to find "+str(token)+" author token"
|
|
||||||
if creators[0].lower().find(token.lower()) != -1:
|
if creators[0].lower().find(token.lower()) != -1:
|
||||||
print "matched token"
|
|
||||||
close_author_match = True
|
close_author_match = True
|
||||||
else:
|
else:
|
||||||
print "token didn't match"
|
|
||||||
close_author_match = False
|
close_author_match = False
|
||||||
break
|
break
|
||||||
if close_title_match and close_author_match and int(formatid) in [1, 50, 410, 900]:
|
if close_title_match and close_author_match and int(formatid) in [1, 50, 410, 900]:
|
||||||
@ -350,7 +323,6 @@ class OverDrive(Source):
|
|||||||
|
|
||||||
|
|
||||||
def find_ovrdrv_data(self, br, title, author, isbn, ovrdrv_id=None):
|
def find_ovrdrv_data(self, br, title, author, isbn, ovrdrv_id=None):
|
||||||
print "in find_ovrdrv_data, title is "+str(title)+", author is "+str(author)+", overdrive id is "+str(ovrdrv_id)
|
|
||||||
q = base_url
|
q = base_url
|
||||||
if ovrdrv_id is None:
|
if ovrdrv_id is None:
|
||||||
return self.overdrive_search(br, q, title, author)
|
return self.overdrive_search(br, q, title, author)
|
||||||
@ -364,28 +336,22 @@ class OverDrive(Source):
|
|||||||
Takes either a title/author combo or an Overdrive ID. One of these
|
Takes either a title/author combo or an Overdrive ID. One of these
|
||||||
two must be passed to this function.
|
two must be passed to this function.
|
||||||
'''
|
'''
|
||||||
print "starting to_ovrdrv_data"
|
|
||||||
if ovrdrv_id is not None:
|
if ovrdrv_id is not None:
|
||||||
with cache_lock:
|
with cache_lock:
|
||||||
ans = ovrdrv_data_cache.get(ovrdrv_id, None)
|
ans = ovrdrv_data_cache.get(ovrdrv_id, None)
|
||||||
if ans:
|
if ans:
|
||||||
print "inside to_ovrdrv_data, cache lookup successful, ans is "+str(ans)
|
|
||||||
return ans
|
return ans
|
||||||
elif ans is False:
|
elif ans is False:
|
||||||
print "inside to_ovrdrv_data, ans returned False"
|
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
ovrdrv_data = self.find_ovrdrv_data(br, title, author, ovrdrv_id)
|
ovrdrv_data = self.find_ovrdrv_data(br, title, author, ovrdrv_id)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
print "trying to retrieve data, running find_ovrdrv_data"
|
|
||||||
ovrdrv_data = self.find_ovrdrv_data(br, title, author, ovrdrv_id)
|
ovrdrv_data = self.find_ovrdrv_data(br, title, author, ovrdrv_id)
|
||||||
print "ovrdrv_data is "+str(ovrdrv_data)
|
|
||||||
except:
|
except:
|
||||||
import traceback
|
import traceback
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
ovrdrv_data = None
|
ovrdrv_data = None
|
||||||
print "writing results to ovrdrv_data cache"
|
|
||||||
with cache_lock:
|
with cache_lock:
|
||||||
ovrdrv_data_cache[ovrdrv_id] = ovrdrv_data if ovrdrv_data else False
|
ovrdrv_data_cache[ovrdrv_id] = ovrdrv_data if ovrdrv_data else False
|
||||||
|
|
||||||
@ -402,7 +368,6 @@ class OverDrive(Source):
|
|||||||
publisher[5], creators[6], reserveid[7], title[8]]
|
publisher[5], creators[6], reserveid[7], title[8]]
|
||||||
|
|
||||||
'''
|
'''
|
||||||
print "inside parse_search_results, writing the metadata results"
|
|
||||||
ovrdrv_id = ovrdrv_data[7]
|
ovrdrv_id = ovrdrv_data[7]
|
||||||
mi.set_identifier('overdrive', ovrdrv_id)
|
mi.set_identifier('overdrive', ovrdrv_id)
|
||||||
|
|
||||||
@ -445,7 +410,7 @@ class OverDrive(Source):
|
|||||||
mi.pubdate = parse_date(pub_date[0].strip())
|
mi.pubdate = parse_date(pub_date[0].strip())
|
||||||
if lang:
|
if lang:
|
||||||
mi.language = lang[0].strip()
|
mi.language = lang[0].strip()
|
||||||
print "languages is "+str(mi.language)
|
|
||||||
#if ebook_isbn:
|
#if ebook_isbn:
|
||||||
# print "ebook isbn is "+str(ebook_isbn[0])
|
# print "ebook isbn is "+str(ebook_isbn[0])
|
||||||
# isbn = check_isbn(ebook_isbn[0].strip())
|
# isbn = check_isbn(ebook_isbn[0].strip())
|
||||||
@ -454,7 +419,7 @@ class OverDrive(Source):
|
|||||||
# mi.isbn = isbn
|
# mi.isbn = isbn
|
||||||
if subjects:
|
if subjects:
|
||||||
mi.tags = [tag.strip() for tag in subjects[0].split(',')]
|
mi.tags = [tag.strip() for tag in subjects[0].split(',')]
|
||||||
print "tags are "+str(mi.tags)
|
|
||||||
if desc:
|
if desc:
|
||||||
desc = desc[0]
|
desc = desc[0]
|
||||||
desc = html.tostring(desc, method='html', encoding=unicode).strip()
|
desc = html.tostring(desc, method='html', encoding=unicode).strip()
|
||||||
@ -468,7 +433,6 @@ class OverDrive(Source):
|
|||||||
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
print "running through main tests"
|
|
||||||
import tempfile, os, time
|
import tempfile, os, time
|
||||||
tdir = tempfile.gettempdir()
|
tdir = tempfile.gettempdir()
|
||||||
br = browser()
|
br = browser()
|
||||||
@ -490,19 +454,19 @@ def main(args=sys.argv):
|
|||||||
(None, '9780345509741', 'The Horror Stories of Robert E. Howard', ['Robert E. Howard']), # Complex title with initials/dots stripped, some results don't have a cover
|
(None, '9780345509741', 'The Horror Stories of Robert E. Howard', ['Robert E. Howard']), # Complex title with initials/dots stripped, some results don't have a cover
|
||||||
]:
|
]:
|
||||||
cpath = os.path.join(tdir, title+'.jpg')
|
cpath = os.path.join(tdir, title+'.jpg')
|
||||||
print "cpath is "+cpath
|
#print "cpath is "+cpath
|
||||||
st = time.time()
|
st = time.time()
|
||||||
curl = get_cover_url(isbn, title, author, br, ovrdrv_id)
|
curl = get_cover_url(isbn, title, author, br, ovrdrv_id)
|
||||||
print '\n\n Took ', time.time() - st, ' to get basic metadata\n\n'
|
#print '\n\n Took ', time.time() - st, ' to get basic metadata\n\n'
|
||||||
if curl is None:
|
#if curl is None:
|
||||||
print 'No cover found for', title
|
# print 'No cover found for', title
|
||||||
else:
|
#else:
|
||||||
print "curl is "+curl
|
# print "curl is "+curl
|
||||||
#open(cpath, 'wb').write(br.open_novisit(curl).read())
|
# open(cpath, 'wb').write(br.open_novisit(curl).read())
|
||||||
#print 'Cover for', title, 'saved to', cpath
|
# print 'Cover for', title, 'saved to', cpath
|
||||||
st = time.time()
|
st = time.time()
|
||||||
print get_social_metadata(title, author, isbn, ovrdrv_id)
|
#print get_social_metadata(title, author, isbn, ovrdrv_id)
|
||||||
print '\n\n Took ', time.time() - st, ' to get detailed metadata\n\n'
|
#print '\n\n Took ', time.time() - st, ' to get detailed metadata\n\n'
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user