From 6f9fff63e03f2392c6c0e646530b5a16e804ffb2 Mon Sep 17 00:00:00 2001 From: Lee Date: Wed, 23 Mar 2011 22:38:29 +0800 Subject: [PATCH] ... --- src/calibre/ebooks/metadata/overdrive.py | 19 +++++++++++++------ src/calibre/ebooks/metadata/sources/base.py | 8 ++++---- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/calibre/ebooks/metadata/overdrive.py b/src/calibre/ebooks/metadata/overdrive.py index e72d168146..61ff2ee7ae 100644 --- a/src/calibre/ebooks/metadata/overdrive.py +++ b/src/calibre/ebooks/metadata/overdrive.py @@ -120,7 +120,7 @@ def overdrive_search(br, q, title, author): #author_q = re.sub('\s', '+', author_q) print "final author query is "+str(author_q) print "final title query is "+str(title_q) - q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=20&sSearch='+title_q + q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+title_q query = '{"szKeyword":"'+author_q+'"}' # main query, requires specific Content Type header @@ -152,11 +152,11 @@ def sort_ovrdrv_results(raw, title=None, title_tokens=None, author=None, author_ #print results # The search results are either from a keyword search or a multi-format list from a single ID, # sort through the results for closest match/format - for result in results: - print "\n\n\nthis result is "+str(result) + if results: for reserveid, od_title, subtitle, edition, series, publisher, format, formatid, creators, \ thumbimage, shortdescription, worldcatlink, excerptlink, creatorfile, sorttitle, \ availabletolibrary, availabletoretailer, relevancyrank, unknown1, unknown2, unknown3 in results: + print "this record's title is "+od_title+", subtitle is "+subtitle+", author[s] are "+creators+", series is "+series if ovrdrv_id is not None and int(formatid) in [1, 50, 410, 900]: print "overdrive id is not None, searching based on format type priority" return format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid) @@ -183,11 +183,16 @@ def sort_ovrdrv_results(raw, title=None, title_tokens=None, author=None, author_ close_author_match = False break if close_title_match and close_author_match and int(formatid) in [1, 50, 410, 900]: - close_matches.append(format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)) + if subtitle and series: + close_matches.insert(0, format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)) + else: + close_matches.append(format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)) if close_matches: return close_matches[0] else: - return None + return '' + else: + return '' @@ -394,7 +399,8 @@ def main(args=sys.argv): for ovrdrv_id, isbn, title, author in [ #(None, '0899661343', 'On the Road', ['Jack Kerouac']), # basic test, no series, single author #(None, '9780061952838', 'The Fellowship of the Ring', ['J. R. R. Tolkien']), # Series test, multi-author - #(None, '9780061952838', 'The Two Towers', ['J. R. R. Tolkien']), # Series test, book 2 + #(None, '9780061952838', 'The Two Towers (The Lord of the Rings, Book II)', ['J. R. R. Tolkien']), # Series test, book 2 + #(None, '9780618153985', 'The Fellowship of the Ring (The Lord of the Rings, Part 1)', ['J.R.R. Tolkien']), #('57844706-20fa-4ace-b5ee-3470b1b52173', None, 'The Two Towers', ['J. R. R. Tolkien']), # Series test, w/ ovrdrv id #(None, '9780345505057', 'Deluge', ['Anne McCaffrey']) # Multiple authors #(None, None, 'Deluge', ['Anne McCaffrey']) # Empty ISBN @@ -405,6 +411,7 @@ def main(args=sys.argv): #(None, '9781400050802', 'The Zombie Survival Guide', ['Max Brooks']), # Two books with this title by this author #(None, '9781775414315', 'The Worst Journey in the World / Antarctic 1910-1913', ['Apsley Cherry-Garrard']), # Garbage sub-title (None, '9780440335160', 'Outlander', ['Diana Gabaldon']), # Returns lots of results to sort through to get the best match + (None, '9780345509741', 'The Horror Stories of Robert E. Howard', ['Robert E. Howard']), # Complex title with initials/dots stripped, some results don't have a cover ]: cpath = os.path.join(tdir, title+'.jpg') print "cpath is "+cpath diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py index 464d08032b..6fc52eb88b 100644 --- a/src/calibre/ebooks/metadata/sources/base.py +++ b/src/calibre/ebooks/metadata/sources/base.py @@ -102,8 +102,8 @@ class Source(Plugin): if authors: # Leave ' in there for Irish names - remove_pat = re.compile(r'[,:;!@#$%^&*(){}.`~"\s\[\]/]') - replace_pat = re.compile(r'-+') + remove_pat = re.compile(r'[,!@#$%^&*(){}`~"\s\[\]/]') + replace_pat = re.compile(r'[-+.:;]') if only_first_author: authors = authors[:1] for au in authors: @@ -128,12 +128,12 @@ class Source(Plugin): subtitle = re.compile(r'([\(\[\{].*?[\)\]\}]|[/:\\].*$)') if len(subtitle.sub('', title)) > 1: title = subtitle.sub('', title) - pat = re.compile(r'''([-,:;+!@#$%^&*(){}.`~"\s\[\]/]|'(?!s))''') + pat = re.compile(r'''([-,:;+!@#$%^*(){}.`~"\s\[\]/]|'(?!s))''') title = pat.sub(' ', title) tokens = title.split() for token in tokens: token = token.strip() - if token and token.lower() not in ('a', 'and', 'the') and strip_joiners: + if token and token.lower() not in ('a', 'and', 'the', '&') and strip_joiners: yield token elif token: yield token