From a81e601428b80f31d1d992d6d78ba88936a8c10d Mon Sep 17 00:00:00 2001
From: Lee <ldolse@yahoo.com>
Date: Sun, 6 Mar 2011 17:18:54 +0800
Subject: [PATCH 01/30] added initial Overdrive support

---
 src/calibre/customize/builtins.py        |   8 +-
 src/calibre/ebooks/metadata/covers.py    |  34 ++
 src/calibre/ebooks/metadata/fetch.py     |  21 ++
 src/calibre/ebooks/metadata/overdrive.py | 386 +++++++++++++++++++++++
 4 files changed, 445 insertions(+), 4 deletions(-)
 create mode 100644 src/calibre/ebooks/metadata/overdrive.py

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index cd4c866562..0c71317f8f 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -581,19 +581,19 @@ from calibre.devices.kobo.driver import KOBO
 from calibre.devices.bambook.driver import BAMBOOK
 
 from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
-    KentDistrictLibrary
+    KentDistrictLibrary, Overdrive
 from calibre.ebooks.metadata.douban import DoubanBooks
 from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
 from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
-        AmazonCovers, DoubanCovers
+        AmazonCovers, DoubanCovers, OverdriveCovers
 from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
 from calibre.ebooks.epub.fix.unmanifested import Unmanifested
 from calibre.ebooks.epub.fix.epubcheck import Epubcheck
 
-plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
+plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, Overdrive,
         KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
         Epubcheck, OpenLibraryCovers, AmazonCovers, DoubanCovers,
-        NiceBooksCovers]
+        NiceBooksCovers, OverdriveCovers]
 plugins += [
     ComicInput,
     EPUBInput,
diff --git a/src/calibre/ebooks/metadata/covers.py b/src/calibre/ebooks/metadata/covers.py
index 15e0a05c1e..280ca077ef 100644
--- a/src/calibre/ebooks/metadata/covers.py
+++ b/src/calibre/ebooks/metadata/covers.py
@@ -145,6 +145,40 @@ class AmazonCovers(CoverDownload): # {{{
 
 # }}}
 
+class OverdriveCovers(CoverDownload): # {{{
+
+    name = 'overdrive.com covers'
+    description = _('Download covers from Overdrive')
+    author = 'Kovid Goyal'
+
+
+    def has_cover(self, mi, ans, timeout=5.):
+        if not mi.authors or not mi.title:
+            return False
+        from calibre.ebooks.metadata.overdrive import get_cover_url
+        br = browser()
+        try:
+            get_cover_url(mi.isbn, mi.title, mi.authors, br)
+            self.debug('cover for', mi.isbn, 'found')
+            ans.set()
+        except Exception, e:
+            self.debug(e)
+
+    def get_covers(self, mi, result_queue, abort, timeout=5.):
+        if not mi.isbn:
+            return
+        from calibre.ebooks.metadata.overdrive import get_cover_url
+        br = browser()
+        try:
+            url = get_cover_url(mi.isbn, mi.title, mi.authors, br)
+            cover_data = br.open_novisit(url).read()
+            result_queue.put((True, cover_data, 'jpg', self.name))
+        except Exception, e:
+            result_queue.put((False, self.exception_to_string(e),
+                traceback.format_exc(), self.name))
+
+# }}}
+
 def check_for_cover(mi, timeout=5.): # {{{
     from calibre.customize.ui import cover_sources
     ans = Event()
diff --git a/src/calibre/ebooks/metadata/fetch.py b/src/calibre/ebooks/metadata/fetch.py
index 667b4f4d7c..1f584bc107 100644
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@@ -250,6 +250,27 @@ class Amazon(MetadataSource): # {{{
 
     # }}}
 
+class Overdrive(MetadataSource): # {{{
+
+    name = 'Overdrive'
+    metadata_type = 'social'
+    description = _('Downloads  metadata from the Overdrive library network')
+
+    has_html_comments = True
+
+    def fetch(self):
+        if not self.isbn:
+            return
+        from calibre.ebooks.metadata.overdrive import get_metadata
+        try:
+            self.results = get_metadata(self.title, self.book_author,
+                    self.publisher, self.isbn)
+        except Exception, e:
+            self.exception = e
+            self.tb = traceback.format_exc()
+
+    # }}}
+
 class KentDistrictLibrary(MetadataSource): # {{{
 
     name = 'Kent District Library'
diff --git a/src/calibre/ebooks/metadata/overdrive.py b/src/calibre/ebooks/metadata/overdrive.py
new file mode 100644
index 0000000000..ad512579d7
--- /dev/null
+++ b/src/calibre/ebooks/metadata/overdrive.py
@@ -0,0 +1,386 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
+__docformat__ = 'restructuredtext en'
+
+'''
+Fetch metadata using Adobe Overdrive
+'''
+import sys, re, random, urllib, mechanize, copy
+from threading import RLock
+
+from lxml import html, etree
+from lxml.html import soupparser
+
+from calibre import browser
+from calibre.ebooks.metadata import check_isbn
+from calibre.ebooks.metadata.sources.base import Source
+from calibre.ebooks.metadata.book.base import Metadata
+from calibre.ebooks.chardet import xml_to_unicode
+from calibre.library.comments import sanitize_comments_html
+
+ovrdrv_data_cache = {}
+cover_url_cache = {}
+cache_lock = RLock()
+base_url = 'http://search.overdrive.com/'
+
+def get_base_referer():
+    choices = [
+        'http://overdrive.chipublib.org/82DC601D-7DDE-4212-B43A-09D821935B01/10/375/en/',
+        'http://emedia.clevnet.org/9D321DAD-EC0D-490D-BFD8-64AE2C96ECA8/10/241/en/',
+        'http://singapore.lib.overdrive.com/F11D55BE-A917-4D63-8111-318E88B29740/10/382/en/',
+        'http://ebooks.nypl.org/20E48048-A377-4520-BC43-F8729A42A424/10/257/en/',
+        'http://spl.lib.overdrive.com/5875E082-4CB2-4689-9426-8509F354AFEF/10/335/en/'
+    ]
+    return choices[random.randint(0, len(choices)-1)]
+    
+def format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid):
+    fix_slashes = re.compile(r'\\/')
+    thumbimage = fix_slashes.sub('/', thumbimage)
+    worldcatlink = fix_slashes.sub('/', worldcatlink)
+    cover_url = re.sub('(?P<img>(Ima?g(eType-)?))200', '\g<img>100', thumbimage)
+    social_metadata_url = base_url+'TitleInfo.aspx?ReserveID='+reserveid+'&FormatID='+formatid
+    series_num = ''
+    if not series:
+       if subtitle:
+           title = od_title+': '+subtitle
+    else:
+        title = od_title
+        m = re.search("([0-9]+$)", subtitle)
+        if m:
+            series_num = float(m.group(1))
+    return [cover_url, social_metadata_url, worldcatlink, series, series_num, publisher, creators, reserveid, title]
+
+def overdrive_search(br, q, title, author):
+    q_query = q+'default.aspx/SearchByKeyword'
+    q_init_search = q+'SearchResults.aspx'
+
+    # query terms
+    author_q = re.sub('\s', '+', author)
+    q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=10&sSearch='+author_q
+    query = '{"szKeyword":"'+title+'"}'
+
+    # main query, requires specific Content Type header
+    req = mechanize.Request(q_query)
+    req.add_header('Content-Type', 'application/json; charset=utf-8')
+    br.open_novisit(req, query)
+
+    print "q_init_search is "+q_init_search
+    
+    # the query must be initialized by loading an empty search results page
+    # this page attempts to set a cookie that Mechanize doesn't like
+    # copy the cookiejar to a separate instance and make a one-off request with the temp cookiejar
+    goodcookies = br._ua_handlers['_cookies'].cookiejar
+    clean_cj = mechanize.CookieJar()
+    cookies_to_copy = []
+    for cookie in goodcookies:
+        copied_cookie = copy.deepcopy(cookie)
+        cookies_to_copy.append(copied_cookie)
+    for copied_cookie in cookies_to_copy:
+        clean_cj.set_cookie(copied_cookie)
+
+    br.open_novisit(q_init_search)
+    
+    br.set_cookiejar(clean_cj)
+
+    # get the search results object
+    xreq = mechanize.Request(q_xref)
+    xreq.add_header('X-Requested-With', 'XMLHttpRequest')
+    xreq.add_header('Referer', q_init_search)
+    xreq.add_header('Accept', 'application/json, text/javascript, */*')
+    raw = br.open_novisit(xreq).read()
+    print "overdrive search result is:\n"+raw
+    raw = re.sub('.*?\[\[(?P<content>.*?)\]\].*', '[[\g<content>]]', raw)
+    results = eval(raw)
+    print "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
+    print results
+    # The search results are from a keyword search (overdrive's advanced search is broken), 
+    # sort through the results for closest match/format
+    for result in results:
+        print "\n\n\nthis result is "+str(result)
+        for reserveid, od_title, subtitle, edition, series, publisher, format, formatid, creators, \
+                thumbimage, shortdescription, worldcatlink, excerptlink, creatorfile, sorttitle, \
+                availabletolibrary, availabletoretailer, relevancyrank, unknown1, unknown2, unknown3 in results:
+            creators = creators.split(', ')
+            print "fixed creators are: "+str(creators)
+            # if an exact match occurs
+            if creators[0] == author and od_title == title and int(formatid) in [1, 50, 410, 900]:
+                print "Got Exact Match!!!"
+                return format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)
+        
+
+def library_search(br, q, title, author):
+    q_search = q+'AdvancedSearch.htm'
+    q_query = q+'BANGSearch.dll'
+    br.open(q_search)
+    # Search for cover with audiobooks lowest priority
+    for format in ['410','50','900','25','425']:
+        query = 'Title='+title+'&Creator='+author+'&Keyword=&ISBN=&Format='+format+'&Language=&Publisher=&Subject=&Award=&CollDate=&PerPage=10&Sort=SortBy%3Dtitle'
+        query = re.sub('\s', '+', query)
+        #print "search url is "+str(q_search)
+        print "query is "+str(query)
+        raw = br.open(q_query, query).read()
+        #print "raw html is:\n"+str(raw)
+        raw = xml_to_unicode(raw, strip_encoding_pats=True,
+                resolve_entities=True)[0]
+        root = html.fromstring(raw)
+        revs = root.xpath("//img[@class='blackborder']")
+        if revs:
+            #print "revs are "+str(revs)
+            # get the first match, as it's the most likely candidate
+            x = revs[0]
+            id = urllib.unquote(re.sub('.*?/(?P<i>%7B.*?%7D).*', '\g<i>', x.get('src')))
+            curl = re.sub('(?P<img>(Ima?g(eType-)?))200', '\g<img>100', x.get('src'))
+            murl = root.xpath("//img[@class='blackborder']/parent::*")
+            if murl:
+                murl = [y.get('href') for y in murl]
+                print "murl is"+str(murl)
+                murl = q+murl[0]
+            else:
+                print "didn't get metadata URL"
+            print "curl is "+str(curl)+", id is "+str(id)+", murl is "+str(murl)
+            ovrdrv_data = [id, curl, murl]
+            print "revs final are "+str(revs)
+            return ovrdrv_data
+
+
+def find_ovrdrv_data(br, title, author, isbn):
+    print "in fnd_ovrdrv_data, title is "+str(title)+", author is "+str(author)
+    q = base_url
+    if re.match('http://search\.overdrive\.', q):
+       return overdrive_search(br, q, title, author)
+    else:
+       return library_search(br, q, title, author)
+    
+
+
+def to_ovrdrv_data(br, title, author, isbn):
+    print "starting to_ovrdrv_data"
+    with cache_lock:
+        ans = ovrdrv_data_cache.get(isbn, None)
+    if ans:
+        print "inside to_ovrdrv_data, ans returned positive, ans is"+str(ans)
+        return ans
+    if ans is False:
+        print "inside to_ovrdrv_data, ans returned False"
+        return None
+    try:
+        ovrdrv_data = find_ovrdrv_data(br, title, author, isbn)
+        print "ovrdrv_data = "+str(ovrdrv_data)
+    except:
+        import traceback
+        traceback.print_exc()
+        ovrdrv_data = None
+
+    with cache_lock:
+        ovrdrv_data_cache[isbn] = ovrdrv_data if ovrdrv_data else False
+    return ovrdrv_data
+
+
+def get_social_metadata(title, authors, publisher, isbn):
+    author = authors[0]
+    mi = Metadata(title, authors)
+    if not isbn:
+        return mi
+    isbn = check_isbn(isbn)
+    if not isbn:
+        return mi
+    br = browser()
+    ovrdrv_data = to_ovrdrv_data(br, title, authors, isbn)
+    if ovrdrv_data and get_metadata_detail_ovrdrv(br, ovrdrv_data, mi):
+        return mi
+    #from calibre.ebooks.metadata.xisbn import xisbn
+    #for i in xisbn.get_associated_isbns(isbn):
+    #    print "xisbn isbn is "+str(i)
+    #    ovrdrv_data = to_ovrdrv_data(br, title, author, i)
+    #    if ovrdrv_data and get_metadata_detail(br, ovrdrv_data, mi):
+    #        return mi
+    return mi
+
+def get_cover_url(isbn, title, author, br):
+    print "starting get_cover_url"
+    isbn = check_isbn(isbn)
+    print "isbn is "+str(isbn)
+    print "title is "+str(title)
+    print "author is "+str(author[0])
+    cleanup = Source()
+    author = cleanup.get_author_tokens(author)
+    print "cleansed author is "+str(author)
+
+    with cache_lock:
+        ans = cover_url_cache.get(isbn, None)
+    if ans:
+        print "ans returned positive"
+        return ans
+    if ans is False:
+        "ans returned false"
+        return None
+    print "in get_cover_url, running through ovrdrv_data function"
+    ovrdrv_data = to_ovrdrv_data(br, title, author, isbn)
+    print "ovrdrv_id is "+str(ovrdrv_data)
+    if ovrdrv_data:
+        ans = ovrdrv_data[0]
+        print "inside get_cover_url, ans is "+str(ans)
+        if ans:
+            with cache_lock:
+                cover_url_cache[isbn] = ans
+            return ans
+    #from calibre.ebooks.metadata.xisbn import xisbn
+    #for i in xisbn.get_associated_isbns(isbn):
+    #    print "in get_cover_url, using xisbn list to associate other books"
+    #    ovrdrv_data = to_ovrdrv_data(br, title, author, i)
+    #    if ovrdrv_data:
+    #        ans = _get_cover_url(br, ovrdrv_data)
+    #        if ans:
+    #            with cache_lock:
+    #                cover_url_cache[isbn] = ans
+    #                cover_url_cache[i] = ans
+    #            return ans
+    with cache_lock:
+        cover_url_cache[isbn] = False
+    return None
+
+def _get_cover_url(br, ovrdrv_data):
+    q = ovrdrv_data[1]
+    try:
+        raw = br.open_novisit(q).read()
+    except Exception, e:
+        if callable(getattr(e, 'getcode', None)) and \
+                e.getcode() == 404:
+            return None
+        raise
+    if '<title>404 - ' in raw:
+        return None
+    raw = xml_to_unicode(raw, strip_encoding_pats=True,
+            resolve_entities=True)[0]
+    try:
+        root = soupparser.fromstring(raw)
+    except:
+        return False
+
+    imgs = root.xpath('//img[@id="prodImage" and @src]')
+    if imgs:
+        src = imgs[0].get('src')
+        parts = src.split('/')
+        if len(parts) > 3:
+            bn = parts[-1]
+            sparts = bn.split('_')
+            if len(sparts) > 2:
+                bn = sparts[0] + sparts[-1]
+                return ('/'.join(parts[:-1]))+'/'+bn
+    return None
+
+
+def get_metadata_detail(br, ovrdrv_data, mi):
+    q = ovrdrv_data[2]
+    try:
+        raw = br.open_novisit(q).read()
+    except Exception, e:
+        if callable(getattr(e, 'getcode', None)) and \
+                e.getcode() == 404:
+            return False
+        raise
+    if '<title>404 - ' in raw:
+        return False
+    raw = xml_to_unicode(raw, strip_encoding_pats=True,
+            resolve_entities=True)[0]
+    try:
+        root = soupparser.fromstring(raw)
+    except:
+        return False
+
+    # Check for series name and retrieve it
+    series_name = root.xpath("//td/script[re:test(text(), 'szSeries', 'i')]", 
+                           namespaces={"re": "http://exslt.org/regular-expressions"})
+    if series_name:
+        series = html.tostring(series_name[0], method='html', encoding=unicode).strip()
+        series = re.sub('(?s).*?szSeries\s*=\s*\"(?P<series>.*?)\";.*', '\g<series>', series)
+        if len(series) > 1:
+            mi.series = series
+            # If series was successful attempt to get the series number
+            series_num = root.xpath("//div/strong[re:test(text(), ',\s(Book|Part|Volume)')]", 
+                                  namespaces={"re": "http://exslt.org/regular-expressions"})
+            if series_num:
+                series_num = float(re.sub('(?s).*?,\s*(Book|Part|Volume)\s*(?P<num>\d+).*', '\g<num>', 
+                                 etree.tostring(series_num[0])))
+                if series_num >= 1:
+                    mi.series_index = series_num
+            print "series_num is "+str(series_num)
+
+    desc = root.xpath("//td[@class='collection' and re:test(., 'Description', 'i')]/following::div[1]", 
+                    namespaces={"re": "http://exslt.org/regular-expressions"})
+    if desc:
+        desc = desc[0]
+        desc = html.tostring(desc, method='html', encoding=unicode).strip()
+        # remove all attributes from tags
+        desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
+        # Remove comments
+        desc = re.sub(r'(?s)<!--.*?-->', '', desc)
+        mi.comments = sanitize_comments_html(desc)
+
+    publisher = root.xpath("//td/strong[re:test(text(), 'Publisher\:', 'i')]/ancestor::td[1]/following-sibling::td/text()", 
+                         namespaces={"re": "http://exslt.org/regular-expressions"})
+    if publisher:
+        mi.publisher = re.sub('^\s*(?P<pub>.*?)\s*$', '\g<pub>', publisher[0])
+        print "publisher is "+str(mi.publisher)
+
+    lang = root.xpath("//td/strong[re:test(text(), 'Language\(s\):', 'i')]/ancestor::td[1]/following-sibling::td/text()", 
+                    namespaces={"re": "http://exslt.org/regular-expressions"})
+    if lang:
+        mi.language = re.sub('^\s*(?P<lang>.*?)\s*$', '\g<lang>', lang[0])
+        print "languages is "+str(mi.language)    
+
+    isbn = root.xpath("//tr/td[re:test(text(), 'ISBN:', 'i')]/following::td/text()", 
+                    namespaces={"re": "http://exslt.org/regular-expressions"})
+    if isbn:
+        mi.isbn = re.sub('^\s*(?P<isbn>.*?)\s*$', '\g<isbn>', isbn[0])
+        print "ISBN is "+str(mi.isbn)    
+
+    subjects = root.xpath("//td/strong[re:test(text(), 'Subject', 'i')]/ancestor::td[1]/following-sibling::td/a/text()", 
+                        namespaces={"re": "http://exslt.org/regular-expressions"})
+    if subjects:
+        mi.tags = subjects
+        print "tags are "+str(mi.tags) 
+
+    creators = root.xpath("//table/tr/td[re:test(text(), '\s*by', 'i')]/ancestor::tr[1]/td[2]/table/tr/td/a/text()", 
+                        namespaces={"re": "http://exslt.org/regular-expressions"})
+    if creators:
+        print "authors are "+str(creators)
+        mi.authors = creators
+
+    return True
+
+def main(args=sys.argv):
+    print "running through main tests"
+    import tempfile, os, time
+    tdir = tempfile.gettempdir()
+    br = browser()
+    for isbn, title, author in [
+            #('0899661343', 'On the Road', ['Jack Kerouac']), # basic test, no series, single author
+            #('9780061952838', 'The Fellowship of the Ring', ['J. R. R. Tolkien']), # Series test, multi-author
+            #('9780061952838', 'The Two Towers', ['J. R. R. Tolkien']), # Series test, book 2
+            ('9780345505057', 'Deluge', ['Anne McCaffrey']) # Multiple authors
+            #('', 'Deluge', ['Anne McCaffrey']) # Empty ISBN
+            #(None, 'On the Road', ['Jack Kerouac']) # Nonetype ISBN
+            ]:
+        cpath = os.path.join(tdir, title+'.jpg')
+        print "cpath is "+cpath
+        st = time.time()
+        curl = get_cover_url(isbn, title, author, br)
+        print '\n\n Took ', time.time() - st, ' to get metadata\n\n'
+        if curl is None:
+            print 'No cover found for', title
+        else:
+            print "curl is "+curl
+            #open(cpath, 'wb').write(br.open_novisit(curl).read())
+            #print 'Cover for', title, 'saved to', cpath
+
+        #import time
+        
+        #print get_social_metadata(title, author, None, isbn)
+        #print '\n\n', time.time() - st, '\n\n'
+
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())

From 4e428219c94c05df0597c54aab9849f227928094 Mon Sep 17 00:00:00 2001
From: Lee <ldolse@yahoo.com>
Date: Mon, 7 Mar 2011 04:37:47 +0800
Subject: [PATCH 02/30] ...

---
 src/calibre/ebooks/metadata/overdrive.py | 67 +++++++++++++-----------
 1 file changed, 36 insertions(+), 31 deletions(-)

diff --git a/src/calibre/ebooks/metadata/overdrive.py b/src/calibre/ebooks/metadata/overdrive.py
index cb9ab2c9c9..5afb875fad 100644
--- a/src/calibre/ebooks/metadata/overdrive.py
+++ b/src/calibre/ebooks/metadata/overdrive.py
@@ -24,32 +24,29 @@ cover_url_cache = {}
 cache_lock = RLock()
 base_url = 'http://search.overdrive.com/'
 
-class ContentReserve(Source):
 
-    def create_query(self, title=None, authors=None, identifiers={}):
-        q = ''
-        if title or authors:
-            def build_term(prefix, parts):
-                return ' '.join('in'+prefix + ':' + x for x in parts)
-            title_tokens = list(self.get_title_tokens(title))
-            if title_tokens:
-                q += build_term('title', title_tokens)
-            author_tokens = self.get_author_tokens(authors,
-                    only_first_author=True)
-            if author_tokens:
-                q += ('+' if q else '') + build_term('author',
-                        author_tokens)
 
-        if isinstance(q, unicode):
-            q = q.encode('utf-8')
-        if not q:
-            return None
-        return BASE_URL+urlencode({
-            'q':q,
-            'max-results':20,
-            'start-index':1,
-            'min-viewability':'none',
-            })
+def create_query(self, title=None, authors=None, identifiers={}):
+    q = ''
+    if title or authors:
+        def build_term(prefix, parts):
+            return ' '.join('in'+prefix + ':' + x for x in parts)
+        title_tokens = list(self.get_title_tokens(title))
+        if title_tokens:
+            q += build_term('title', title_tokens)
+        author_tokens = self.get_author_tokens(authors,
+                only_first_author=True)
+        if author_tokens:
+            q += ('+' if q else '') + build_term('author',
+                    author_tokens)
+
+    if isinstance(q, unicode):
+        q = q.encode('utf-8')
+    if not q:
+        return None
+    return BASE_URL+urlencode({
+        'q':q,
+        })
 
 
 def get_base_referer():
@@ -82,9 +79,20 @@ def format_results(reserveid, od_title, subtitle, series, publisher, creators, t
 def overdrive_search(br, q, title, author):
     q_query = q+'default.aspx/SearchByKeyword'
     q_init_search = q+'SearchResults.aspx'
-
+    # get first author as string - convert this to a proper cleanup function later
+    s = Source(None)
+    print "printing list with string:"
+    print list(s.get_author_tokens(['J. R. R. Tolkien']))
+    print "printing list with author "+str(author)+":"
+    print list(s.get_author_tokens(author))
+    author = list(s.get_author_tokens(author))
+    for token in author:
+        print "cleaned up author is: "+str(token)
+    author_q = '+'.join(author)
+    #author_q = separator.join(for x in author)
     # query terms
-    author_q = re.sub('\s', '+', author)
+    #author_q = re.sub('\s', '+', author_q)
+    print "final author query is "+str(author_q)
     q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=10&sSearch='+author_q
     query = '{"szKeyword":"'+title+'"}'
 
@@ -231,9 +239,6 @@ def get_cover_url(isbn, title, author, br):
     print "isbn is "+str(isbn)
     print "title is "+str(title)
     print "author is "+str(author[0])
-    cleanup = ContentReserve()
-    query = cleanup.create_query(author, title)
-    print "cleansed query is "+str(author)
 
     with cache_lock:
         ans = cover_url_cache.get(isbn, None)
@@ -386,8 +391,8 @@ def main(args=sys.argv):
     for isbn, title, author in [
             #('0899661343', 'On the Road', ['Jack Kerouac']), # basic test, no series, single author
             #('9780061952838', 'The Fellowship of the Ring', ['J. R. R. Tolkien']), # Series test, multi-author
-            #('9780061952838', 'The Two Towers', ['J. R. R. Tolkien']), # Series test, book 2
-            ('9780345505057', 'Deluge', ['Anne McCaffrey']) # Multiple authors
+            ('9780061952838', 'The Two Towers', ['J. R. R. Tolkien']), # Series test, book 2
+            #('9780345505057', 'Deluge', ['Anne McCaffrey']) # Multiple authors
             #('', 'Deluge', ['Anne McCaffrey']) # Empty ISBN
             #(None, 'On the Road', ['Jack Kerouac']) # Nonetype ISBN
             ]:

From c6a2c8e82e5dcd64f0bfb605b10f3a590eb41a08 Mon Sep 17 00:00:00 2001
From: Lee <ldolse@yahoo.com>
Date: Tue, 22 Mar 2011 13:53:09 +0800
Subject: [PATCH 03/30] further work on the overdrive plugin

---
 src/calibre/ebooks/metadata/covers.py       |   9 +-
 src/calibre/ebooks/metadata/fetch.py        |   6 +-
 src/calibre/ebooks/metadata/overdrive.py    | 386 ++++++++++----------
 src/calibre/ebooks/metadata/sources/base.py |  18 +-
 4 files changed, 214 insertions(+), 205 deletions(-)

diff --git a/src/calibre/ebooks/metadata/covers.py b/src/calibre/ebooks/metadata/covers.py
index 9f5958f1ad..6ea292aa93 100644
--- a/src/calibre/ebooks/metadata/covers.py
+++ b/src/calibre/ebooks/metadata/covers.py
@@ -161,14 +161,7 @@ class OverdriveCovers(CoverDownload): # {{{
     def has_cover(self, mi, ans, timeout=5.):
         if not mi.authors or not mi.title:
             return False
-        from calibre.ebooks.metadata.overdrive import get_cover_url
-        br = browser()
-        try:
-            get_cover_url(mi.isbn, mi.title, mi.authors, br)
-            self.debug('cover for', mi.isbn, 'found')
-            ans.set()
-        except Exception, e:
-            self.debug(e)
+        return True
 
     def get_covers(self, mi, result_queue, abort, timeout=5.):
         if not mi.isbn:
diff --git a/src/calibre/ebooks/metadata/fetch.py b/src/calibre/ebooks/metadata/fetch.py
index 1f584bc107..0401ee78c5 100644
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@@ -261,10 +261,10 @@ class Overdrive(MetadataSource): # {{{
     def fetch(self):
         if not self.isbn:
             return
-        from calibre.ebooks.metadata.overdrive import get_metadata
+        from calibre.ebooks.metadata.overdrive import get_social_metadata
         try:
-            self.results = get_metadata(self.title, self.book_author,
-                    self.publisher, self.isbn)
+            self.results = get_social_metadata(self.title, self.book_author, self.isbn)
+            
         except Exception, e:
             self.exception = e
             self.tb = traceback.format_exc()
diff --git a/src/calibre/ebooks/metadata/overdrive.py b/src/calibre/ebooks/metadata/overdrive.py
index 5afb875fad..e72d168146 100644
--- a/src/calibre/ebooks/metadata/overdrive.py
+++ b/src/calibre/ebooks/metadata/overdrive.py
@@ -25,13 +25,12 @@ cache_lock = RLock()
 base_url = 'http://search.overdrive.com/'
 
 
-
 def create_query(self, title=None, authors=None, identifiers={}):
     q = ''
     if title or authors:
         def build_term(prefix, parts):
             return ' '.join('in'+prefix + ':' + x for x in parts)
-        title_tokens = list(self.get_title_tokens(title))
+        title_tokens = list(self.get_title_tokens(title, False))
         if title_tokens:
             q += build_term('title', title_tokens)
         author_tokens = self.get_author_tokens(authors,
@@ -58,7 +57,7 @@ def get_base_referer():
         'http://spl.lib.overdrive.com/5875E082-4CB2-4689-9426-8509F354AFEF/10/335/en/'
     ]
     return choices[random.randint(0, len(choices)-1)]
-    
+
 def format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid):
     fix_slashes = re.compile(r'\\/')
     thumbimage = fix_slashes.sub('/', thumbimage)
@@ -67,8 +66,10 @@ def format_results(reserveid, od_title, subtitle, series, publisher, creators, t
     social_metadata_url = base_url+'TitleInfo.aspx?ReserveID='+reserveid+'&FormatID='+formatid
     series_num = ''
     if not series:
-       if subtitle:
-           title = od_title+': '+subtitle
+        if subtitle:
+            title = od_title+': '+subtitle
+        else:
+            title = od_title
     else:
         title = od_title
         m = re.search("([0-9]+$)", subtitle)
@@ -76,36 +77,12 @@ def format_results(reserveid, od_title, subtitle, series, publisher, creators, t
             series_num = float(m.group(1))
     return [cover_url, social_metadata_url, worldcatlink, series, series_num, publisher, creators, reserveid, title]
 
-def overdrive_search(br, q, title, author):
-    q_query = q+'default.aspx/SearchByKeyword'
-    q_init_search = q+'SearchResults.aspx'
-    # get first author as string - convert this to a proper cleanup function later
-    s = Source(None)
-    print "printing list with string:"
-    print list(s.get_author_tokens(['J. R. R. Tolkien']))
-    print "printing list with author "+str(author)+":"
-    print list(s.get_author_tokens(author))
-    author = list(s.get_author_tokens(author))
-    for token in author:
-        print "cleaned up author is: "+str(token)
-    author_q = '+'.join(author)
-    #author_q = separator.join(for x in author)
-    # query terms
-    #author_q = re.sub('\s', '+', author_q)
-    print "final author query is "+str(author_q)
-    q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=10&sSearch='+author_q
-    query = '{"szKeyword":"'+title+'"}'
-
-    # main query, requires specific Content Type header
-    req = mechanize.Request(q_query)
-    req.add_header('Content-Type', 'application/json; charset=utf-8')
-    br.open_novisit(req, query)
-
-    print "q_init_search is "+q_init_search
-    
-    # the query must be initialized by loading an empty search results page
-    # this page attempts to set a cookie that Mechanize doesn't like
-    # copy the cookiejar to a separate instance and make a one-off request with the temp cookiejar
+def safe_query(br, query_url):
+    '''
+    The query must be initialized by loading an empty search results page
+    this page attempts to set a cookie that Mechanize doesn't like
+    copy the cookiejar to a separate instance and make a one-off request with the temp cookiejar
+    '''
     goodcookies = br._ua_handlers['_cookies'].cookiejar
     clean_cj = mechanize.CookieJar()
     cookies_to_copy = []
@@ -115,10 +92,46 @@ def overdrive_search(br, q, title, author):
     for copied_cookie in cookies_to_copy:
         clean_cj.set_cookie(copied_cookie)
 
-    br.open_novisit(q_init_search)
+    br.open_novisit(query_url)
     
     br.set_cookiejar(clean_cj)
 
+
+def overdrive_search(br, q, title, author):
+    q_query = q+'default.aspx/SearchByKeyword'
+    q_init_search = q+'SearchResults.aspx'
+    # get first author as string - convert this to a proper cleanup function later
+    s = Source(None)
+    print "printing list with string:"
+    #print list(s.get_author_tokens(['J. R. R. Tolkien']))
+    print "printing list with author "+str(author)+":"
+    print list(s.get_author_tokens(author))
+    author_tokens = list(s.get_author_tokens(author))
+    for token in author_tokens:
+        print "cleaned up author token is: "+str(token)
+    author_q = ' '.join(author_tokens)
+
+    title_tokens = list(s.get_title_tokens(title))
+    for token in title_tokens:
+        print "cleaned up title token is: "+str(token)
+    title_q = '+'.join(title_tokens)
+    #author_q = separator.join(for x in author)
+    # query terms
+    #author_q = re.sub('\s', '+', author_q)
+    print "final author query is "+str(author_q)
+    print "final title query is "+str(title_q)
+    q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=20&sSearch='+title_q
+    query = '{"szKeyword":"'+author_q+'"}'
+
+    # main query, requires specific Content Type header
+    req = mechanize.Request(q_query)
+    req.add_header('Content-Type', 'application/json; charset=utf-8')
+    br.open_novisit(req, query)
+
+    print "q_init_search is "+q_init_search
+    # initiate the search without messing up the cookiejar
+    safe_query(br, q_init_search)
+
     # get the search results object
     xreq = mechanize.Request(q_xref)
     xreq.add_header('X-Requested-With', 'XMLHttpRequest')
@@ -126,83 +139,102 @@ def overdrive_search(br, q, title, author):
     xreq.add_header('Accept', 'application/json, text/javascript, */*')
     raw = br.open_novisit(xreq).read()
     print "overdrive search result is:\n"+raw
+    print "\n\nsorting results"
+    return sort_ovrdrv_results(raw, title, title_tokens, author, author_tokens)
+
+
+def sort_ovrdrv_results(raw, title=None, title_tokens=None, author=None, author_tokens=None, ovrdrv_id=None):
+    print "\ntitle to search for is "+str(title)+"\nauthor to search for is "+str(author)
+    close_matches = []
     raw = re.sub('.*?\[\[(?P<content>.*?)\]\].*', '[[\g<content>]]', raw)
     results = eval(raw)
     print "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
-    print results
-    # The search results are from a keyword search (overdrive's advanced search is broken), 
+    #print results
+    # The search results are either from a keyword search or a multi-format list from a single ID,
     # sort through the results for closest match/format
     for result in results:
         print "\n\n\nthis result is "+str(result)
         for reserveid, od_title, subtitle, edition, series, publisher, format, formatid, creators, \
                 thumbimage, shortdescription, worldcatlink, excerptlink, creatorfile, sorttitle, \
                 availabletolibrary, availabletoretailer, relevancyrank, unknown1, unknown2, unknown3 in results:
-            creators = creators.split(', ')
-            print "fixed creators are: "+str(creators)
-            # if an exact match occurs
-            if creators[0] == author and od_title == title and int(formatid) in [1, 50, 410, 900]:
-                print "Got Exact Match!!!"
-                return format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)
-        
-
-def library_search(br, q, title, author):
-    q_search = q+'AdvancedSearch.htm'
-    q_query = q+'BANGSearch.dll'
-    br.open(q_search)
-    # Search for cover with audiobooks lowest priority
-    for format in ['410','50','900','25','425']:
-        query = 'Title='+title+'&Creator='+author+'&Keyword=&ISBN=&Format='+format+'&Language=&Publisher=&Subject=&Award=&CollDate=&PerPage=10&Sort=SortBy%3Dtitle'
-        query = re.sub('\s', '+', query)
-        #print "search url is "+str(q_search)
-        print "query is "+str(query)
-        raw = br.open(q_query, query).read()
-        #print "raw html is:\n"+str(raw)
-        raw = xml_to_unicode(raw, strip_encoding_pats=True,
-                resolve_entities=True)[0]
-        root = html.fromstring(raw)
-        revs = root.xpath("//img[@class='blackborder']")
-        if revs:
-            #print "revs are "+str(revs)
-            # get the first match, as it's the most likely candidate
-            x = revs[0]
-            id = urllib.unquote(re.sub('.*?/(?P<i>%7B.*?%7D).*', '\g<i>', x.get('src')))
-            curl = re.sub('(?P<img>(Ima?g(eType-)?))200', '\g<img>100', x.get('src'))
-            murl = root.xpath("//img[@class='blackborder']/parent::*")
-            if murl:
-                murl = [y.get('href') for y in murl]
-                print "murl is"+str(murl)
-                murl = q+murl[0]
+            if ovrdrv_id is not None and int(formatid) in [1, 50, 410, 900]:
+                print "overdrive id is not None, searching based on format type priority"
+                return format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)            
             else:
-                print "didn't get metadata URL"
-            print "curl is "+str(curl)+", id is "+str(id)+", murl is "+str(murl)
-            ovrdrv_data = [id, curl, murl]
-            print "revs final are "+str(revs)
-            return ovrdrv_data
+                creators = creators.split(', ')
+                print "fixed creators are: "+str(creators)
+                # if an exact match in a preferred format occurs
+                if creators[0] == author[0] and od_title == title and int(formatid) in [1, 50, 410, 900]:
+                    print "Got Exact Match!!!"
+                    return format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)
+                else:
+                    close_title_match = False
+                    close_author_match = False
+                    for token in title_tokens:
+                        if od_title.lower().find(token.lower()) != -1:
+                            close_title_match = True
+                        else:
+                            close_title_match = False
+                            break
+                    for token in author_tokens:
+                        if creators[0].lower().find(token.lower()) != -1:
+                            close_author_match = True
+                        else:
+                            close_author_match = False
+                            break
+                    if close_title_match and close_author_match and int(formatid) in [1, 50, 410, 900]:
+                        close_matches.append(format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
+        if close_matches:
+            return close_matches[0]
+        else:
+            return None
 
 
-def find_ovrdrv_data(br, title, author, isbn):
-    print "in fnd_ovrdrv_data, title is "+str(title)+", author is "+str(author)
+
+def overdrive_get_record(br, q, ovrdrv_id):
+    search_url = q+'SearchResults.aspx?ReserveID={'+ovrdrv_id+'}'
+    results_url = q+'SearchResults.svc/GetResults?sEcho=1&iColumns=18&sColumns=ReserveID%2CTitle%2CSubtitle%2CEdition%2CSeries%2CPublisher%2CFormat%2CFormatID%2CCreators%2CThumbImage%2CShortDescription%2CWorldCatLink%2CExcerptLink%2CCreatorFile%2CSortTitle%2CAvailableToLibrary%2CAvailableToRetailer%2CRelevancyRank&iDisplayStart=0&iDisplayLength=10&sSearch=&bEscapeRegex=true&iSortingCols=1&iSortCol_0=17&sSortDir_0=asc'
+
+    # get the base url to set the proper session cookie
+    br.open_novisit(q)
+
+    # initialize the search
+    safe_query(br, search_url)
+
+    # get the results
+    req = mechanize.Request(results_url)
+    req.add_header('X-Requested-With', 'XMLHttpRequest')
+    req.add_header('Referer', search_url)
+    req.add_header('Accept', 'application/json, text/javascript, */*')
+    raw = br.open_novisit(req)
+    raw = str(list(raw))
+    return sort_ovrdrv_results(raw, None, None, None, ovrdrv_id)
+
+
+def find_ovrdrv_data(br, title, author, isbn, ovrdrv_id=None):
+    print "in find_ovrdrv_data, title is "+str(title)+", author is "+str(author)+", overdrive id is "+str(ovrdrv_id)
     q = base_url
-    if re.match('http://search\.overdrive\.', q):
+    if ovrdrv_id is None:
        return overdrive_search(br, q, title, author)
     else:
-       return library_search(br, q, title, author)
-    
+       return overdrive_get_record(br, q, ovrdrv_id)
 
 
-def to_ovrdrv_data(br, title, author, isbn):
+
+def to_ovrdrv_data(br, title, author, isbn, ovrdrv_id=None):
     print "starting to_ovrdrv_data"
     with cache_lock:
         ans = ovrdrv_data_cache.get(isbn, None)
     if ans:
-        print "inside to_ovrdrv_data, ans returned positive, ans is"+str(ans)
+        print "inside to_ovrdrv_data, cache lookup successful, ans is "+str(ans)
         return ans
     if ans is False:
         print "inside to_ovrdrv_data, ans returned False"
         return None
     try:
-        ovrdrv_data = find_ovrdrv_data(br, title, author, isbn)
-        print "ovrdrv_data = "+str(ovrdrv_data)
+        print "trying to retrieve data, running find_ovrdrv_data"
+        ovrdrv_data = find_ovrdrv_data(br, title, author, isbn, ovrdrv_id)
+        print "ovrdrv_data is "+str(ovrdrv_data)
     except:
         import traceback
         traceback.print_exc()
@@ -210,66 +242,69 @@ def to_ovrdrv_data(br, title, author, isbn):
 
     with cache_lock:
         ovrdrv_data_cache[isbn] = ovrdrv_data if ovrdrv_data else False
+    if ovrdrv_data:
+        from calibre.ebooks.metadata.xisbn import xisbn
+        for i in xisbn.get_associated_isbns(isbn):
+            with cache_lock:
+                ovrdrv_data_cache[i] = ovrdrv_data
+
     return ovrdrv_data
 
 
-def get_social_metadata(title, authors, publisher, isbn):
+def get_social_metadata(title, authors, isbn, ovrdrv_id=None):
     author = authors[0]
     mi = Metadata(title, authors)
-    if not isbn:
-        return mi
-    isbn = check_isbn(isbn)
-    if not isbn:
-        return mi
     br = browser()
-    ovrdrv_data = to_ovrdrv_data(br, title, authors, isbn)
-    if ovrdrv_data and get_metadata_detail_ovrdrv(br, ovrdrv_data, mi):
+    print "calling to_ovrdrv_data from inside get_social_metadata"
+    ovrdrv_data = to_ovrdrv_data(br, title, authors, isbn, ovrdrv_id)
+
+    #[cover_url, social_metadata_url, worldcatlink, series, series_num, publisher, creators, reserveid, title]
+
+    if len(ovrdrv_data[3]) > 1:
+        mi.series = ovrdrv_data[3]
+        if ovrdrv_data[4]:
+            mi.series_index = ovrdrv_data[4]
+    mi.publisher = ovrdrv_data[5]
+    mi.authors = ovrdrv_data[6]
+    if ovrdrv_id is None:
+        ovrdrv_id = ovrdrv_data[7]
+    mi.set_identifier('overdrive', ovrdrv_id)
+    mi.title = ovrdrv_data[8]
+
+    if ovrdrv_data and get_metadata_detail(br, ovrdrv_data[1], mi, isbn):
         return mi
-    #from calibre.ebooks.metadata.xisbn import xisbn
-    #for i in xisbn.get_associated_isbns(isbn):
-    #    print "xisbn isbn is "+str(i)
-    #    ovrdrv_data = to_ovrdrv_data(br, title, author, i)
-    #    if ovrdrv_data and get_metadata_detail(br, ovrdrv_data, mi):
-    #        return mi
     return mi
 
-def get_cover_url(isbn, title, author, br):
+def get_cover_url(isbn, title, author, br, ovrdrv_id=None):
     print "starting get_cover_url"
-    isbn = check_isbn(isbn)
-    print "isbn is "+str(isbn)
     print "title is "+str(title)
     print "author is "+str(author[0])
+    print "isbn is "+str(isbn)
+    print "ovrdrv_id is "+str(ovrdrv_id)
 
     with cache_lock:
         ans = cover_url_cache.get(isbn, None)
+        #ans = cover_url_cache.get(ovrdrv_id, None)
     if ans:
-        print "ans returned positive"
+        print "cover url cache lookup returned positive, ans is "+str(ans)
         return ans
     if ans is False:
-        "ans returned false"
+        "cover url cache lookup returned false"
         return None
-    print "in get_cover_url, running through ovrdrv_data function"
-    ovrdrv_data = to_ovrdrv_data(br, title, author, isbn)
-    print "ovrdrv_id is "+str(ovrdrv_data)
+    print "in get_cover_url, calling to_ovrdrv_data function"
+    ovrdrv_data = to_ovrdrv_data(br, title, author, isbn, ovrdrv_id)
     if ovrdrv_data:
         ans = ovrdrv_data[0]
-        print "inside get_cover_url, ans is "+str(ans)
+        print "inside get_cover_url, got url from to_ovrdrv_data, ans is "+str(ans)
         if ans:
+            print "writing cover url to url cache"
             with cache_lock:
                 cover_url_cache[isbn] = ans
+                #cover_url_cache[ovrdrv_id] = ans
             return ans
-    #from calibre.ebooks.metadata.xisbn import xisbn
-    #for i in xisbn.get_associated_isbns(isbn):
-    #    print "in get_cover_url, using xisbn list to associate other books"
-    #    ovrdrv_data = to_ovrdrv_data(br, title, author, i)
-    #    if ovrdrv_data:
-    #        ans = _get_cover_url(br, ovrdrv_data)
-    #        if ans:
-    #            with cache_lock:
-    #                cover_url_cache[isbn] = ans
-    #                cover_url_cache[i] = ans
-    #            return ans
+            
     with cache_lock:
+        print "marking cover url cache for this isbn false"
         cover_url_cache[isbn] = False
     return None
 
@@ -303,18 +338,14 @@ def _get_cover_url(br, ovrdrv_data):
                 return ('/'.join(parts[:-1]))+'/'+bn
     return None
 
-
-def get_metadata_detail(br, ovrdrv_data, mi):
-    q = ovrdrv_data[2]
+def get_metadata_detail(br, metadata_url, mi, isbn=None):
     try:
-        raw = br.open_novisit(q).read()
+        raw = br.open_novisit(metadata_url).read()
     except Exception, e:
         if callable(getattr(e, 'getcode', None)) and \
                 e.getcode() == 404:
             return False
-        raise
-    if '<title>404 - ' in raw:
-        return False
+        raise   
     raw = xml_to_unicode(raw, strip_encoding_pats=True,
             resolve_entities=True)[0]
     try:
@@ -322,26 +353,28 @@ def get_metadata_detail(br, ovrdrv_data, mi):
     except:
         return False
 
-    # Check for series name and retrieve it
-    series_name = root.xpath("//td/script[re:test(text(), 'szSeries', 'i')]", 
-                           namespaces={"re": "http://exslt.org/regular-expressions"})
-    if series_name:
-        series = html.tostring(series_name[0], method='html', encoding=unicode).strip()
-        series = re.sub('(?s).*?szSeries\s*=\s*\"(?P<series>.*?)\";.*', '\g<series>', series)
-        if len(series) > 1:
-            mi.series = series
-            # If series was successful attempt to get the series number
-            series_num = root.xpath("//div/strong[re:test(text(), ',\s(Book|Part|Volume)')]", 
-                                  namespaces={"re": "http://exslt.org/regular-expressions"})
-            if series_num:
-                series_num = float(re.sub('(?s).*?,\s*(Book|Part|Volume)\s*(?P<num>\d+).*', '\g<num>', 
-                                 etree.tostring(series_num[0])))
-                if series_num >= 1:
-                    mi.series_index = series_num
-            print "series_num is "+str(series_num)
+    isbn = check_isbn(isbn)
 
-    desc = root.xpath("//td[@class='collection' and re:test(., 'Description', 'i')]/following::div[1]", 
-                    namespaces={"re": "http://exslt.org/regular-expressions"})
+    pub_date = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblPubDate']/text()")
+    lang = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblLanguage']/text()")
+    subjects = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblSubjects']/text()")
+    ebook_isbn = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblIdentifier']/text()")
+    desc = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblDescription']/ancestor::div[1]")
+
+    if pub_date:
+        from calibre.utils.date import parse_date
+        mi.pubdate = parse_date(pub_date[0].strip())
+    if lang:
+        mi.language = lang[0].strip()
+        print "languages is "+str(mi.language)
+    if ebook_isbn and isbn is None:
+        print "ebook isbn is "+str(ebook_isbn[0])
+        mi.set_identifier('isbn', ebook_isbn)
+    #elif isbn is not None:
+    #    mi.set_identifier('isbn', isbn)
+    if subjects:
+        mi.tags = subjects
+        print "tags are "+str(mi.tags)
     if desc:
         desc = desc[0]
         desc = html.tostring(desc, method='html', encoding=unicode).strip()
@@ -351,36 +384,6 @@ def get_metadata_detail(br, ovrdrv_data, mi):
         desc = re.sub(r'(?s)<!--.*?-->', '', desc)
         mi.comments = sanitize_comments_html(desc)
 
-    publisher = root.xpath("//td/strong[re:test(text(), 'Publisher\:', 'i')]/ancestor::td[1]/following-sibling::td/text()", 
-                         namespaces={"re": "http://exslt.org/regular-expressions"})
-    if publisher:
-        mi.publisher = re.sub('^\s*(?P<pub>.*?)\s*$', '\g<pub>', publisher[0])
-        print "publisher is "+str(mi.publisher)
-
-    lang = root.xpath("//td/strong[re:test(text(), 'Language\(s\):', 'i')]/ancestor::td[1]/following-sibling::td/text()", 
-                    namespaces={"re": "http://exslt.org/regular-expressions"})
-    if lang:
-        mi.language = re.sub('^\s*(?P<lang>.*?)\s*$', '\g<lang>', lang[0])
-        print "languages is "+str(mi.language)    
-
-    isbn = root.xpath("//tr/td[re:test(text(), 'ISBN:', 'i')]/following::td/text()", 
-                    namespaces={"re": "http://exslt.org/regular-expressions"})
-    if isbn:
-        mi.isbn = re.sub('^\s*(?P<isbn>.*?)\s*$', '\g<isbn>', isbn[0])
-        print "ISBN is "+str(mi.isbn)    
-
-    subjects = root.xpath("//td/strong[re:test(text(), 'Subject', 'i')]/ancestor::td[1]/following-sibling::td/a/text()", 
-                        namespaces={"re": "http://exslt.org/regular-expressions"})
-    if subjects:
-        mi.tags = subjects
-        print "tags are "+str(mi.tags) 
-
-    creators = root.xpath("//table/tr/td[re:test(text(), '\s*by', 'i')]/ancestor::tr[1]/td[2]/table/tr/td/a/text()", 
-                        namespaces={"re": "http://exslt.org/regular-expressions"})
-    if creators:
-        print "authors are "+str(creators)
-        mi.authors = creators
-
     return True
 
 def main(args=sys.argv):
@@ -388,19 +391,26 @@ def main(args=sys.argv):
     import tempfile, os, time
     tdir = tempfile.gettempdir()
     br = browser()
-    for isbn, title, author in [
-            #('0899661343', 'On the Road', ['Jack Kerouac']), # basic test, no series, single author
-            #('9780061952838', 'The Fellowship of the Ring', ['J. R. R. Tolkien']), # Series test, multi-author
-            ('9780061952838', 'The Two Towers', ['J. R. R. Tolkien']), # Series test, book 2
-            #('9780345505057', 'Deluge', ['Anne McCaffrey']) # Multiple authors
-            #('', 'Deluge', ['Anne McCaffrey']) # Empty ISBN
-            #(None, 'On the Road', ['Jack Kerouac']) # Nonetype ISBN
+    for ovrdrv_id, isbn, title, author in [
+            #(None, '0899661343', 'On the Road', ['Jack Kerouac']), # basic test, no series, single author
+            #(None, '9780061952838', 'The Fellowship of the Ring', ['J. R. R. Tolkien']), # Series test, multi-author
+            #(None, '9780061952838', 'The Two Towers', ['J. R. R. Tolkien']), # Series test, book 2
+            #('57844706-20fa-4ace-b5ee-3470b1b52173', None, 'The Two Towers', ['J. R. R. Tolkien']), # Series test, w/ ovrdrv id
+            #(None, '9780345505057', 'Deluge', ['Anne McCaffrey']) # Multiple authors
+            #(None, None, 'Deluge', ['Anne McCaffrey']) # Empty ISBN
+            #(None, None, 'On the Road', ['Jack Kerouac']), # Nonetype ISBN
+            #(None, '9780345435279', 'A Caress of Twilight', ['Laurell K. Hamilton']),
+            #(None, '9780606087230', 'The Omnivore\'s Dilemma : A Natural History of Four Meals', ['Michael Pollan']), # Subtitle colon
+            #(None, '9780061747649', 'Mental_Floss Presents: Condensed Knowledge', ['Will Pearson', 'Mangesh Hattikudur']),
+            #(None, '9781400050802', 'The Zombie Survival Guide', ['Max Brooks']), # Two books with this title by this author
+            #(None, '9781775414315', 'The Worst Journey in the World / Antarctic 1910-1913', ['Apsley Cherry-Garrard']), # Garbage sub-title
+            (None, '9780440335160', 'Outlander', ['Diana Gabaldon']), # Returns lots of results to sort through to get the best match
             ]:
         cpath = os.path.join(tdir, title+'.jpg')
         print "cpath is "+cpath
         st = time.time()
-        curl = get_cover_url(isbn, title, author, br)
-        print '\n\n Took ', time.time() - st, ' to get metadata\n\n'
+        curl = get_cover_url(isbn, title, author, br, ovrdrv_id)
+        print '\n\n Took ', time.time() - st, ' to get basic metadata\n\n'
         if curl is None:
             print 'No cover found for', title
         else:
@@ -408,9 +418,7 @@ def main(args=sys.argv):
             #open(cpath, 'wb').write(br.open_novisit(curl).read())
             #print 'Cover for', title, 'saved to', cpath
 
-        #import time
-        
-        #print get_social_metadata(title, author, None, isbn)
+        print get_social_metadata(title, author, isbn, ovrdrv_id)
         #print '\n\n', time.time() - st, '\n\n'
 
     return 0
diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index 55cc996cf7..b600eafaf2 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -87,32 +87,40 @@ class Source(Plugin):
 
         if authors:
             # Leave ' in there for Irish names
-            pat = re.compile(r'[-,:;+!@#$%^&*(){}.`~"\s\[\]/]')
+            remove_pat = re.compile(r'[,:;!@#$%^&*(){}.`~"\s\[\]/]')
+            replace_pat = re.compile(r'-+')
             if only_first_author:
                 authors = authors[:1]
             for au in authors:
+                au = replace_pat.sub(' ', au)
                 parts = au.split()
                 if ',' in au:
                     # au probably in ln, fn form
                     parts = parts[1:] + parts[:1]
                 for tok in parts:
-                    tok = pat.sub('', tok).strip()
+                    tok = remove_pat.sub('', tok).strip()
                     if len(tok) > 2 and tok.lower() not in ('von', ):
                         yield tok
 
 
-    def get_title_tokens(self, title):
+    def get_title_tokens(self, title, strip_joiners=True):
         '''
         Take a title and return a list of tokens useful for an AND search query.
         Excludes connectives and punctuation.
         '''
         if title:
-            pat = re.compile(r'''[-,:;+!@#$%^&*(){}.`~"'\s\[\]/]''')
+            # strip sub-titles
+            subtitle = re.compile(r'([\(\[\{].*?[\)\]\}]|[/:\\].*$)')
+            if len(subtitle.sub('', title)) > 1:
+                title = subtitle.sub('', title)
+            pat = re.compile(r'''([-,:;+!@#$%^&*(){}.`~"\s\[\]/]|'(?!s))''')
             title = pat.sub(' ', title)
             tokens = title.split()
             for token in tokens:
                 token = token.strip()
-                if token and token.lower() not in ('a', 'and', 'the'):
+                if token and token.lower() not in ('a', 'and', 'the') and strip_joiners:
+                    yield token
+                elif token:
                     yield token
 
     def split_jobs(self, jobs, num):

From 6f9fff63e03f2392c6c0e646530b5a16e804ffb2 Mon Sep 17 00:00:00 2001
From: Lee <ldolse@yahoo.com>
Date: Wed, 23 Mar 2011 22:38:29 +0800
Subject: [PATCH 04/30] ...

---
 src/calibre/ebooks/metadata/overdrive.py    | 19 +++++++++++++------
 src/calibre/ebooks/metadata/sources/base.py |  8 ++++----
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/src/calibre/ebooks/metadata/overdrive.py b/src/calibre/ebooks/metadata/overdrive.py
index e72d168146..61ff2ee7ae 100644
--- a/src/calibre/ebooks/metadata/overdrive.py
+++ b/src/calibre/ebooks/metadata/overdrive.py
@@ -120,7 +120,7 @@ def overdrive_search(br, q, title, author):
     #author_q = re.sub('\s', '+', author_q)
     print "final author query is "+str(author_q)
     print "final title query is "+str(title_q)
-    q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=20&sSearch='+title_q
+    q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+title_q
     query = '{"szKeyword":"'+author_q+'"}'
 
     # main query, requires specific Content Type header
@@ -152,11 +152,11 @@ def sort_ovrdrv_results(raw, title=None, title_tokens=None, author=None, author_
     #print results
     # The search results are either from a keyword search or a multi-format list from a single ID,
     # sort through the results for closest match/format
-    for result in results:
-        print "\n\n\nthis result is "+str(result)
+    if results:
         for reserveid, od_title, subtitle, edition, series, publisher, format, formatid, creators, \
                 thumbimage, shortdescription, worldcatlink, excerptlink, creatorfile, sorttitle, \
                 availabletolibrary, availabletoretailer, relevancyrank, unknown1, unknown2, unknown3 in results:
+            print "this record's title is "+od_title+", subtitle is "+subtitle+", author[s] are "+creators+", series is "+series
             if ovrdrv_id is not None and int(formatid) in [1, 50, 410, 900]:
                 print "overdrive id is not None, searching based on format type priority"
                 return format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)            
@@ -183,11 +183,16 @@ def sort_ovrdrv_results(raw, title=None, title_tokens=None, author=None, author_
                             close_author_match = False
                             break
                     if close_title_match and close_author_match and int(formatid) in [1, 50, 410, 900]:
-                        close_matches.append(format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
+                        if subtitle and series:
+                            close_matches.insert(0, format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
+                        else:
+                            close_matches.append(format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
         if close_matches:
             return close_matches[0]
         else:
-            return None
+            return ''
+    else:
+        return ''
 
 
 
@@ -394,7 +399,8 @@ def main(args=sys.argv):
     for ovrdrv_id, isbn, title, author in [
             #(None, '0899661343', 'On the Road', ['Jack Kerouac']), # basic test, no series, single author
             #(None, '9780061952838', 'The Fellowship of the Ring', ['J. R. R. Tolkien']), # Series test, multi-author
-            #(None, '9780061952838', 'The Two Towers', ['J. R. R. Tolkien']), # Series test, book 2
+            #(None, '9780061952838', 'The Two Towers (The Lord of the Rings, Book II)', ['J. R. R. Tolkien']), # Series test, book 2
+            #(None, '9780618153985', 'The Fellowship of the Ring (The Lord of the Rings, Part 1)', ['J.R.R. Tolkien']),
             #('57844706-20fa-4ace-b5ee-3470b1b52173', None, 'The Two Towers', ['J. R. R. Tolkien']), # Series test, w/ ovrdrv id
             #(None, '9780345505057', 'Deluge', ['Anne McCaffrey']) # Multiple authors
             #(None, None, 'Deluge', ['Anne McCaffrey']) # Empty ISBN
@@ -405,6 +411,7 @@ def main(args=sys.argv):
             #(None, '9781400050802', 'The Zombie Survival Guide', ['Max Brooks']), # Two books with this title by this author
             #(None, '9781775414315', 'The Worst Journey in the World / Antarctic 1910-1913', ['Apsley Cherry-Garrard']), # Garbage sub-title
             (None, '9780440335160', 'Outlander', ['Diana Gabaldon']), # Returns lots of results to sort through to get the best match
+            (None, '9780345509741', 'The Horror Stories of Robert E. Howard', ['Robert E. Howard']), # Complex title with initials/dots stripped, some results don't have a cover
             ]:
         cpath = os.path.join(tdir, title+'.jpg')
         print "cpath is "+cpath
diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index 464d08032b..6fc52eb88b 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -102,8 +102,8 @@ class Source(Plugin):
 
         if authors:
             # Leave ' in there for Irish names
-            remove_pat = re.compile(r'[,:;!@#$%^&*(){}.`~"\s\[\]/]')
-            replace_pat = re.compile(r'-+')
+            remove_pat = re.compile(r'[,!@#$%^&*(){}`~"\s\[\]/]')
+            replace_pat = re.compile(r'[-+.:;]')
             if only_first_author:
                 authors = authors[:1]
             for au in authors:
@@ -128,12 +128,12 @@ class Source(Plugin):
             subtitle = re.compile(r'([\(\[\{].*?[\)\]\}]|[/:\\].*$)')
             if len(subtitle.sub('', title)) > 1:
                 title = subtitle.sub('', title)
-            pat = re.compile(r'''([-,:;+!@#$%^&*(){}.`~"\s\[\]/]|'(?!s))''')
+            pat = re.compile(r'''([-,:;+!@#$%^*(){}.`~"\s\[\]/]|'(?!s))''')
             title = pat.sub(' ', title)
             tokens = title.split()
             for token in tokens:
                 token = token.strip()
-                if token and token.lower() not in ('a', 'and', 'the') and strip_joiners:
+                if token and token.lower() not in ('a', 'and', 'the', '&') and strip_joiners:
                     yield token
                 elif token:
                     yield token

From 433270f20ead59bc013855d5b1403e43e1f50a02 Mon Sep 17 00:00:00 2001
From: Lee <ldolse@yahoo.com>
Date: Mon, 28 Mar 2011 17:24:45 +0800
Subject: [PATCH 05/30] add another type of scene break to the scene break
 formatting logic

---
 src/calibre/ebooks/conversion/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index f1f2f87293..1546644f95 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -764,6 +764,7 @@ class HeuristicProcessor(object):
         # Multiple sequential blank paragraphs are merged with appropriate margins
         # If non-blank scene breaks exist they are center aligned and styled with appropriate margins.
         if getattr(self.extra_opts, 'format_scene_breaks', False):
+            html = re.sub('(?i)<div[^>]*>\s*<br(\s?/)?>\s*</div>', '<p></p>', html)
             html = self.detect_whitespace(html)
             html = self.detect_soft_breaks(html)
             blanks_count = len(self.any_multi_blank.findall(html))

From 07733b2fc800fb135bbebbeae33153434b82daf3 Mon Sep 17 00:00:00 2001
From: Lee <ldolse@yahoo.com>
Date: Fri, 1 Apr 2011 12:59:07 +0800
Subject: [PATCH 06/30] overdrive tweaks

---
 src/calibre/ebooks/metadata/overdrive.py | 58 +++++++++++++++---------
 1 file changed, 37 insertions(+), 21 deletions(-)

diff --git a/src/calibre/ebooks/metadata/overdrive.py b/src/calibre/ebooks/metadata/overdrive.py
index 61ff2ee7ae..289d6bea0e 100644
--- a/src/calibre/ebooks/metadata/overdrive.py
+++ b/src/calibre/ebooks/metadata/overdrive.py
@@ -107,21 +107,27 @@ def overdrive_search(br, q, title, author):
     print "printing list with author "+str(author)+":"
     print list(s.get_author_tokens(author))
     author_tokens = list(s.get_author_tokens(author))
+    print "there are "+str(len(author_tokens))+" author tokens"
     for token in author_tokens:
         print "cleaned up author token is: "+str(token)
-    author_q = ' '.join(author_tokens)
+
 
     title_tokens = list(s.get_title_tokens(title))
+    print "there are "+str(len(title_tokens))+" title tokens"
     for token in title_tokens:
         print "cleaned up title token is: "+str(token)
-    title_q = '+'.join(title_tokens)
-    #author_q = separator.join(for x in author)
-    # query terms
-    #author_q = re.sub('\s', '+', author_q)
-    print "final author query is "+str(author_q)
-    print "final title query is "+str(title_q)
-    q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+title_q
-    query = '{"szKeyword":"'+author_q+'"}'
+
+    if len(title_tokens) >= len(author_tokens):
+        initial_q = ' '.join(title_tokens)
+        xref_q = '+'.join(author_tokens)
+    else:
+        initial_q = ' '.join(author_tokens)
+        xref_q = '+'.join(title_tokens)
+
+    print "initial query is "+str(initial_q)
+    print "cross reference query is "+str(xref_q)
+    q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
+    query = '{"szKeyword":"'+initial_q+'"}'
 
     # main query, requires specific Content Type header
     req = mechanize.Request(q_query)
@@ -133,12 +139,21 @@ def overdrive_search(br, q, title, author):
     safe_query(br, q_init_search)
 
     # get the search results object
-    xreq = mechanize.Request(q_xref)
-    xreq.add_header('X-Requested-With', 'XMLHttpRequest')
-    xreq.add_header('Referer', q_init_search)
-    xreq.add_header('Accept', 'application/json, text/javascript, */*')
-    raw = br.open_novisit(xreq).read()
-    print "overdrive search result is:\n"+raw
+    results = False
+    while results == False:
+        xreq = mechanize.Request(q_xref)
+        xreq.add_header('X-Requested-With', 'XMLHttpRequest')
+        xreq.add_header('Referer', q_init_search)
+        xreq.add_header('Accept', 'application/json, text/javascript, */*')
+        raw = br.open_novisit(xreq).read()
+        print "overdrive search result is:\n"+raw
+        for m in re.finditer(ur'"iTotalDisplayRecords":(?P<displayrecords>\d+).*?"iTotalRecords":(?P<totalrecords>\d+)', raw):
+            if int(m.group('displayrecords')) >= 1:
+                results = True
+            elif int(m.group('totalrecords')) >= 1:
+                xref_q = ''
+                q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
+        
     print "\n\nsorting results"
     return sort_ovrdrv_results(raw, title, title_tokens, author, author_tokens)
 
@@ -162,7 +177,7 @@ def sort_ovrdrv_results(raw, title=None, title_tokens=None, author=None, author_
                 return format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)            
             else:
                 creators = creators.split(', ')
-                print "fixed creators are: "+str(creators)
+                print "split creators from results are: "+str(creators)
                 # if an exact match in a preferred format occurs
                 if creators[0] == author[0] and od_title == title and int(formatid) in [1, 50, 410, 900]:
                     print "Got Exact Match!!!"
@@ -275,9 +290,10 @@ def get_social_metadata(title, authors, isbn, ovrdrv_id=None):
         ovrdrv_id = ovrdrv_data[7]
     mi.set_identifier('overdrive', ovrdrv_id)
     mi.title = ovrdrv_data[8]
-
+    print "populated basic social metadata, getting detailed metadata"
     if ovrdrv_data and get_metadata_detail(br, ovrdrv_data[1], mi, isbn):
         return mi
+    print "failed to get detailed metadata, returning basic info"
     return mi
 
 def get_cover_url(isbn, title, author, br, ovrdrv_id=None):
@@ -378,7 +394,7 @@ def get_metadata_detail(br, metadata_url, mi, isbn=None):
     #elif isbn is not None:
     #    mi.set_identifier('isbn', isbn)
     if subjects:
-        mi.tags = subjects
+        mi.tags = [tag.strip() for tag in subjects[0].split(',')]
         print "tags are "+str(mi.tags)
     if desc:
         desc = desc[0]
@@ -410,7 +426,7 @@ def main(args=sys.argv):
             #(None, '9780061747649', 'Mental_Floss Presents: Condensed Knowledge', ['Will Pearson', 'Mangesh Hattikudur']),
             #(None, '9781400050802', 'The Zombie Survival Guide', ['Max Brooks']), # Two books with this title by this author
             #(None, '9781775414315', 'The Worst Journey in the World / Antarctic 1910-1913', ['Apsley Cherry-Garrard']), # Garbage sub-title
-            (None, '9780440335160', 'Outlander', ['Diana Gabaldon']), # Returns lots of results to sort through to get the best match
+            #(None, '9780440335160', 'Outlander', ['Diana Gabaldon']), # Returns lots of results to sort through to get the best match
             (None, '9780345509741', 'The Horror Stories of Robert E. Howard', ['Robert E. Howard']), # Complex title with initials/dots stripped, some results don't have a cover
             ]:
         cpath = os.path.join(tdir, title+'.jpg')
@@ -424,9 +440,9 @@ def main(args=sys.argv):
             print "curl is "+curl
             #open(cpath, 'wb').write(br.open_novisit(curl).read())
             #print 'Cover for', title, 'saved to', cpath
-
+        st = time.time()
         print get_social_metadata(title, author, isbn, ovrdrv_id)
-        #print '\n\n', time.time() - st, '\n\n'
+        print '\n\n Took ', time.time() - st, ' to get detailed metadata\n\n'
 
     return 0
 

From c4b5c8c91665d108cceadcd648c36d1e2888c4ef Mon Sep 17 00:00:00 2001
From: Lee <ldolse@yahoo.com>
Date: Thu, 7 Apr 2011 13:31:41 +0800
Subject: [PATCH 07/30] ...

---
 src/calibre/ebooks/metadata/overdrive.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/calibre/ebooks/metadata/overdrive.py b/src/calibre/ebooks/metadata/overdrive.py
index 289d6bea0e..38d6d730ff 100644
--- a/src/calibre/ebooks/metadata/overdrive.py
+++ b/src/calibre/ebooks/metadata/overdrive.py
@@ -153,6 +153,8 @@ def overdrive_search(br, q, title, author):
             elif int(m.group('totalrecords')) >= 1:
                 xref_q = ''
                 q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
+            elif int(m.group('totalrecords')) == 0:
+                return ''
         
     print "\n\nsorting results"
     return sort_ovrdrv_results(raw, title, title_tokens, author, author_tokens)
@@ -185,16 +187,23 @@ def sort_ovrdrv_results(raw, title=None, title_tokens=None, author=None, author_
                 else:
                     close_title_match = False
                     close_author_match = False
+                    print "format id is "+str(formatid)
                     for token in title_tokens:
+                        print "attempting to find "+str(token)+" title token"
                         if od_title.lower().find(token.lower()) != -1:
+                            print "matched token"
                             close_title_match = True
                         else:
+                            print "token didn't match"
                             close_title_match = False
                             break
                     for token in author_tokens:
+                        print "attempting to find "+str(token)+" author token"
                         if creators[0].lower().find(token.lower()) != -1:
+                            print "matched token"
                             close_author_match = True
                         else:
+                            print "token didn't match"
                             close_author_match = False
                             break
                     if close_title_match and close_author_match and int(formatid) in [1, 50, 410, 900]:

From 361e86c6ff04cf0d6a3cb07226309e99df373128 Mon Sep 17 00:00:00 2001
From: Lee <ldolse@yahoo.com>
Date: Thu, 7 Apr 2011 23:04:32 +0800
Subject: [PATCH 08/30] ...

---
 src/calibre/customize/builtins.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 475cb36687..5e50f81173 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -10,6 +10,7 @@ from calibre.constants import numeric_version
 from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
 from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre.ebooks.oeb.base import OEB_IMAGES
+from calibre.utils.config import test_eight_code
 
 # To archive plugins {{{
 class HTML2ZIP(FileTypePlugin):
@@ -166,6 +167,14 @@ class ComicMetadataReader(MetadataReaderPlugin):
     description = _('Extract cover from comic files')
 
     def get_metadata(self, stream, ftype):
+        if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
+            pos = stream.tell()
+            id_ = stream.read(3)
+            stream.seek(pos)
+            if id_ == b'Rar':
+                ftype = 'cbr'
+            elif id.startswith(b'PK'):
+                ftype = 'cbz'
         if ftype == 'cbr':
             from calibre.libunrar import extract_first_alphabetically as extract_first
             extract_first

From ddb3d935d4c311382615dd646eae1f97e512c973 Mon Sep 17 00:00:00 2001
From: Lee <ldolse@yahoo.com>
Date: Fri, 8 Apr 2011 08:37:38 +0800
Subject: [PATCH 09/30] ...

---
 src/calibre/customize/builtins.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 5e50f81173..8dbc72f8ac 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -637,7 +637,7 @@ else:
     from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
             AmazonCovers, DoubanCovers, OverdriveCovers
 
-    plugins += [GoogleBooks, ISBNDB, Amazon,
+    plugins += [GoogleBooks, ISBNDB, Amazon, Overdrive,
         OpenLibraryCovers, AmazonCovers, DoubanCovers, OverdriveCovers,
         NiceBooksCovers, KentDistrictLibrary, DoubanBooks, NiceBooks]
 

From 330d12c5eb8f41295990945d7a74ff1524825ba1 Mon Sep 17 00:00:00 2001
From: Lee <ldolse@yahoo.com>
Date: Wed, 13 Apr 2011 23:24:34 +0800
Subject: [PATCH 10/30] ...

---
 src/calibre/ebooks/mobi/mobiml.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/mobi/mobiml.py b/src/calibre/ebooks/mobi/mobiml.py
index 40ad5e9e78..3feef7b6f5 100644
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@@ -464,9 +464,10 @@ class MobiMLizer(object):
         valign = style['vertical-align']
         not_baseline = valign in ('super', 'sub', 'text-top',
                 'text-bottom') or (
-                isinstance(valign, (float, int)) and abs(valign) != 0)
+                isinstance(valign, (float, int)) and abs(valign) != 0) or (
+                tag in ('sup', 'sub'))
         issup = valign in ('super', 'text-top') or (
-            isinstance(valign, (float, int)) and valign > 0)
+            isinstance(valign, (float, int)) and valign > 0) or tag == 'sup'
         vtag = 'sup' if issup else 'sub'
         if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock:
             nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)

From ecf21962d5fac590f9a8103fda8608e6cade3843 Mon Sep 17 00:00:00 2001
From: Lee <ldolse@yahoo.com>
Date: Thu, 14 Apr 2011 18:34:49 +0800
Subject: [PATCH 11/30] ...

---
 src/calibre/ebooks/mobi/mobiml.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/calibre/ebooks/mobi/mobiml.py b/src/calibre/ebooks/mobi/mobiml.py
index 8c7b740cdb..1e626cf916 100644
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@@ -464,10 +464,9 @@ class MobiMLizer(object):
         valign = style['vertical-align']
         not_baseline = valign in ('super', 'sub', 'text-top',
                 'text-bottom') or (
-                isinstance(valign, (float, int)) and abs(valign) != 0) or (
-                tag in ('sup', 'sub'))
+                isinstance(valign, (float, int)) and abs(valign) != 0)
         issup = valign in ('super', 'text-top') or (
-            isinstance(valign, (float, int)) and valign > 0) or tag == 'sup'
+            isinstance(valign, (float, int)) and valign > 0)
         vtag = 'sup' if issup else 'sub'
         if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock:
             nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)

From 04b543e854a409b73a7da8555815a10b5669e3d7 Mon Sep 17 00:00:00 2001
From: Lee <ldolse@yahoo.com>
Date: Sun, 17 Apr 2011 22:42:57 +0800
Subject: [PATCH 12/30] start porting overdrive to 8

---
 src/calibre/customize/builtins.py           | 4 +++-
 src/calibre/ebooks/conversion/preprocess.py | 2 +-
 src/calibre/ebooks/metadata/sources/base.py | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 8956780e2c..6131c03f9c 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -626,8 +626,9 @@ if test_eight_code:
     from calibre.ebooks.metadata.sources.amazon import Amazon
     from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
     from calibre.ebooks.metadata.sources.isbndb import ISBNDB
+    from calibre.ebooks.metadata.sources.overdrive import OverDrive
 
-    plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB]
+    plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive]
 
 # }}}
 else:
@@ -1097,6 +1098,7 @@ if test_eight_code:
 from calibre.ebooks.metadata.sources.google import GoogleBooks
 from calibre.ebooks.metadata.sources.amazon import Amazon
 from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
+from calibre.ebooks.metadata.sources.overdrive import OverDrive
 
 plugins += [GoogleBooks, Amazon, OpenLibrary]
 
diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index a1d5fa94d8..8822a39b87 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -399,7 +399,7 @@ class HTMLPreProcessor(object):
                   (re.compile(u'˙\s*(<br.*?>)*\s*Z', re.UNICODE), lambda match: u'Ż'),
 
                   # If pdf printed from a browser then the header/footer has a reliable pattern
-                  (re.compile(r'((?<=</a>)\s*file:////?[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''),
+                  (re.compile(r'((?<=</a>)\s*file:/{2,4}[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''),
 
                   # Center separator lines
                   (re.compile(u'<br>\s*(?P<break>([*#•✦=]+\s*)+)\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group(1) + '</p>'),
diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index 56f82641ab..bfc3e498eb 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -293,7 +293,7 @@ class Source(Plugin):
     def get_title_tokens(self, title, strip_joiners=True):
         '''
         Take a title and return a list of tokens useful for an AND search query.
-        Excludes connectives and punctuation.
+        Excludes connectives(optionally) and punctuation.
         '''
         if title:
             # strip sub-titles

From 4ea961ba6298a905bc50136e8054117d77a18575 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Mon, 18 Apr 2011 08:34:22 +0100
Subject: [PATCH 13/30] From Greg

---
 src/calibre/customize/builtins.py             |  5 +-
 src/calibre/devices/apple/driver.py           | 51 +++++++------
 .../devices/content_server/__init__.py        | 10 +++
 src/calibre/devices/content_server/driver.py  | 74 +++++++++++++++++++
 src/calibre/gui2/actions/catalog.py           |  2 +-
 src/calibre/gui2/device.py                    |  2 +-
 src/calibre/gui2/dialogs/tweak_epub.py        | 11 ++-
 src/calibre/library/server/content.py         | 27 ++++++-
 8 files changed, 154 insertions(+), 28 deletions(-)
 create mode 100644 src/calibre/devices/content_server/__init__.py
 create mode 100644 src/calibre/devices/content_server/driver.py

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index d3b0b8409d..458bfec3fd 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -582,6 +582,7 @@ from calibre.ebooks.snb.output import SNBOutput
 from calibre.customize.profiles import input_profiles, output_profiles
 
 from calibre.devices.apple.driver import ITUNES
+from calibre.devices.content_server.driver import CONTENT_SERVER_FOR_CONFIG
 from calibre.devices.hanlin.driver import HANLINV3, HANLINV5, BOOX, SPECTRA
 from calibre.devices.blackberry.driver import BLACKBERRY
 from calibre.devices.cybook.driver import CYBOOK, ORIZON
@@ -753,7 +754,9 @@ plugins += [
     EEEREADER,
     NEXTBOOK,
     ITUNES,
-]
+    CONTENT_SERVER_FOR_CONFIG
+    ]
+
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                         x.__name__.endswith('MetadataReader')]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
diff --git a/src/calibre/devices/apple/driver.py b/src/calibre/devices/apple/driver.py
index 2cc478603a..d7811f0a22 100644
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
@@ -201,8 +201,9 @@ class ITUNES(DriverBase):
     #  0x1294   iPhone 3GS
     #  0x1297   iPhone 4
     #  0x129a   iPad
+    #  0x12a2   iPad2
     VENDOR_ID = [0x05ac]
-    PRODUCT_ID = [0x1292,0x1293,0x1294,0x1297,0x1299,0x129a]
+    PRODUCT_ID = [0x1292,0x1293,0x1294,0x1297,0x1299,0x129a,0x12a2]
     BCD = [0x01]
 
     # Plugboard ID
@@ -421,7 +422,7 @@ class ITUNES(DriverBase):
 
                         cached_books[this_book.path] = {
                          'title':book.name(),
-                         'author':[book.artist()],
+                         'author':book.artist().split(' & '),
                          'lib_book':library_books[this_book.path] if this_book.path in library_books else None,
                          'dev_book':book,
                          'uuid': book.composer()
@@ -459,7 +460,7 @@ class ITUNES(DriverBase):
 
                             cached_books[this_book.path] = {
                              'title':book.Name,
-                             'author':book.Artist,
+                             'author':book.artist().split(' & '),
                              'lib_book':library_books[this_book.path] if this_book.path in library_books else None,
                              'uuid': book.Composer,
                              'format': 'pdf' if book.KindAsString.startswith('PDF') else 'epub'
@@ -1021,7 +1022,9 @@ class ITUNES(DriverBase):
         if isosx:
             for (i,file) in enumerate(files):
                 format = file.rpartition('.')[2].lower()
-                path = self.path_template % (metadata[i].title, metadata[i].author[0],format)
+                path = self.path_template % (metadata[i].title,
+                                             authors_to_string(metadata[i].authors),
+                                             format)
                 self._remove_existing_copy(path, metadata[i])
                 fpath = self._get_fpath(file, metadata[i], format, update_md=True)
                 db_added, lb_added = self._add_new_copy(fpath, metadata[i])
@@ -1034,9 +1037,11 @@ class ITUNES(DriverBase):
                 if DEBUG:
                     self.log.info("ITUNES.upload_books()")
                     self.log.info(" adding '%s' by '%s' uuid:%s to self.cached_books" %
-                                  ( metadata[i].title, metadata[i].author, metadata[i].uuid))
+                                  (metadata[i].title,
+                                   authors_to_string(metadata[i].authors),
+                                   metadata[i].uuid))
                 self.cached_books[this_book.path] = {
-                   'author': metadata[i].author,
+                   'author': authors_to_string(metadata[i].authors),
                  'dev_book': db_added,
                    'format': format,
                  'lib_book': lb_added,
@@ -1055,7 +1060,9 @@ class ITUNES(DriverBase):
 
                 for (i,file) in enumerate(files):
                     format = file.rpartition('.')[2].lower()
-                    path = self.path_template % (metadata[i].title, metadata[i].author[0],format)
+                    path = self.path_template % (metadata[i].title,
+                                                 authors_to_string(metadata[i].authors),
+                                                 format)
                     self._remove_existing_copy(path, metadata[i])
                     fpath = self._get_fpath(file, metadata[i],format, update_md=True)
                     db_added, lb_added = self._add_new_copy(fpath, metadata[i])
@@ -1075,9 +1082,11 @@ class ITUNES(DriverBase):
                     if DEBUG:
                         self.log.info("ITUNES.upload_books()")
                         self.log.info(" adding '%s' by '%s' uuid:%s to self.cached_books" %
-                                      ( metadata[i].title, metadata[i].author, metadata[i].uuid))
+                                      (metadata[i].title,
+                                       authors_to_string(metadata[i].authors),
+                                       metadata[i].uuid))
                     self.cached_books[this_book.path] = {
-                       'author': metadata[i].author[0],
+                       'author': authors_to_string(metadata[i].authors),
                      'dev_book': db_added,
                        'format': format,
                      'lib_book': lb_added,
@@ -1190,7 +1199,7 @@ class ITUNES(DriverBase):
                         base_fn = base_fn.rpartition('.')[0]
                         db_added = self._find_device_book(
                             { 'title': base_fn if format == 'pdf' else metadata.title,
-                             'author': metadata.authors[0],
+                             'author': authors_to_string(metadata.authors),
                                'uuid': metadata.uuid,
                              'format': format})
                     return db_added
@@ -1255,7 +1264,7 @@ class ITUNES(DriverBase):
                 base_fn = base_fn.rpartition('.')[0]
                 added = self._find_library_book(
                     { 'title': base_fn if format == 'pdf' else metadata.title,
-                     'author': metadata.author[0],
+                     'author': authors_to_string(metadata.authors),
                        'uuid': metadata.uuid,
                      'format': format})
         return added
@@ -1314,7 +1323,7 @@ class ITUNES(DriverBase):
                         with open(metadata.cover,'r+b') as cd:
                             cover_data = cd.read()
                 except:
-                    self.problem_titles.append("'%s' by %s" % (metadata.title, metadata.author[0]))
+                    self.problem_titles.append("'%s' by %s" % (metadata.title, authors_to_string(metadata.authors)))
                     self.log.error("  error scaling '%s' for '%s'" % (metadata.cover,metadata.title))
 
                     import traceback
@@ -1389,7 +1398,7 @@ class ITUNES(DriverBase):
                 thumb_path = path.rpartition('.')[0] + '.jpg'
                 zfw.writestr(thumb_path, thumb)
             except:
-                self.problem_titles.append("'%s' by %s" % (metadata.title, metadata.author[0]))
+                self.problem_titles.append("'%s' by %s" % (metadata.title, authors_to_string(metadata.authors)))
                 self.log.error("   error converting '%s' to thumb for '%s'" % (metadata.cover,metadata.title))
             finally:
                 try:
@@ -1407,7 +1416,7 @@ class ITUNES(DriverBase):
         if DEBUG:
             self.log.info(" ITUNES._create_new_book()")
 
-        this_book = Book(metadata.title, authors_to_string(metadata.author))
+        this_book = Book(metadata.title, authors_to_string(metadata.authors))
         this_book.datetime = time.gmtime()
         this_book.db_id = None
         this_book.device_collections = []
@@ -2451,7 +2460,7 @@ class ITUNES(DriverBase):
             for book in self.cached_books:
                 if self.cached_books[book]['uuid'] == metadata.uuid   or \
                    (self.cached_books[book]['title'] == metadata.title and \
-                   self.cached_books[book]['author'] == metadata.authors[0]):
+                   self.cached_books[book]['author'] == authors_to_string(metadata.authors)):
                     self.update_list.append(self.cached_books[book])
                     self._remove_from_device(self.cached_books[book])
                     if DEBUG:
@@ -2470,7 +2479,7 @@ class ITUNES(DriverBase):
             for book in self.cached_books:
                 if self.cached_books[book]['uuid'] == metadata.uuid   or \
                    (self.cached_books[book]['title'] == metadata.title and \
-                    self.cached_books[book]['author'] == metadata.authors[0]):
+                    self.cached_books[book]['author'] == authors_to_string(metadata.authors)):
                     self.update_list.append(self.cached_books[book])
                     self._remove_from_iTunes(self.cached_books[book])
                     if DEBUG:
@@ -2939,13 +2948,13 @@ class ITUNES(DriverBase):
     def _xform_metadata_via_plugboard(self, book, format):
         ''' Transform book metadata from plugboard templates '''
         if DEBUG:
-            self.log.info("  ITUNES._xform_metadata_via_plugboard()")
+            self.log.info(" ITUNES._xform_metadata_via_plugboard()")
 
         if self.plugboard_func:
             pb = self.plugboard_func(self.DEVICE_PLUGBOARD_NAME, format, self.plugboards)
             newmi = book.deepcopy_metadata()
             newmi.template_to_attribute(book, pb)
-            if DEBUG:
+            if pb is not None and DEBUG:
                 self.log.info(" transforming %s using %s:" % (format, pb))
                 self.log.info("       title: %s %s" % (book.title, ">>> %s" %
                                            newmi.title if book.title != newmi.title else ''))
@@ -3062,7 +3071,7 @@ class ITUNES_ASYNC(ITUNES):
 
                     cached_books[this_book.path] = {
                      'title':library_books[book].name(),
-                     'author':[library_books[book].artist()],
+                     'author':library_books[book].artist().split(' & '),
                      'lib_book':library_books[book],
                      'dev_book':None,
                      'uuid': library_books[book].composer(),
@@ -3102,7 +3111,7 @@ class ITUNES_ASYNC(ITUNES):
 
                         cached_books[this_book.path] = {
                          'title':library_books[book].Name,
-                         'author':library_books[book].Artist,
+                         'author':library_books[book].Artist.split(' & '),
                          'lib_book':library_books[book],
                          'uuid': library_books[book].Composer,
                          'format': format
@@ -3288,7 +3297,7 @@ class Book(Metadata):
     See ebooks.metadata.book.base
     '''
     def __init__(self,title,author):
-        Metadata.__init__(self, title, authors=[author])
+        Metadata.__init__(self, title, authors=author.split(' & '))
 
     @property
     def title_sorter(self):
diff --git a/src/calibre/devices/content_server/__init__.py b/src/calibre/devices/content_server/__init__.py
new file mode 100644
index 0000000000..3d1a86922e
--- /dev/null
+++ b/src/calibre/devices/content_server/__init__.py
@@ -0,0 +1,10 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+
diff --git a/src/calibre/devices/content_server/driver.py b/src/calibre/devices/content_server/driver.py
new file mode 100644
index 0000000000..84b14f8e62
--- /dev/null
+++ b/src/calibre/devices/content_server/driver.py
@@ -0,0 +1,74 @@
+'''
+Created on 17 Apr 2011
+
+@author: GRiker, modeled on charles's Folder Device
+
+'''
+
+from calibre.constants import DEBUG
+from calibre.devices.interface import DevicePlugin
+from calibre.devices.usbms.deviceconfig import DeviceConfig
+from calibre.devices.usbms.driver import USBMS, BookList
+
+class DriverBase(DeviceConfig, DevicePlugin):
+    # Reduce to just the formats eligible for plugboard xforms
+    # These formats are shown in the customization dialog
+    FORMATS = ['epub', 'mobi']
+    USER_CAN_ADD_NEW_FORMATS = False
+
+    # Hide the standard customization widgets
+    SUPPORTS_SUB_DIRS = False
+    MUST_READ_METADATA = True
+    SUPPORTS_USE_AUTHOR_SORT = False
+
+
+# This class is added to the standard device plugin chain, so that it can
+# be configured. It has invalid vendor_id etc, so it will never match a
+# device. The 'real' CONTENT_SERVER will use the config from it.
+class CONTENT_SERVER_FOR_CONFIG(USBMS):
+    name           = 'Content Server Interface'
+    gui_name       = 'Content Server'
+    description    = _('Enables metadata plugboards to be used with Content Server.')
+    author         = 'GRiker'
+    supported_platforms = ['windows', 'osx', 'linux']
+
+    VENDOR_ID   = [0xffff]
+    PRODUCT_ID  = [0xffff]
+    BCD         = [0xffff]
+    DEVICE_PLUGBOARD_NAME = 'CONTENT_SERVER'
+
+    def config_widget(cls):
+        '''
+        Configure a minimal QWidget
+        Better to simply disable the config_widget altogether
+        '''
+        cw = DriverBase.config_widget()
+        # Turn off the Save template
+        cw.opt_save_template.setVisible(False)
+        cw.label.setVisible(False)
+        # Hide the up/down arrows
+        cw.column_up.setVisible(False)
+        cw.column_down.setVisible(False)
+        # Retitle
+        cw.groupBox.setTitle(_("Enable metadata plugboards for the following formats:"))
+        return cw
+
+class CONTENT_SERVER(USBMS):
+
+    FORMATS     = CONTENT_SERVER_FOR_CONFIG.FORMATS
+    DEVICE_PLUGBOARD_NAME = 'CONTENT_SERVER'
+
+    def __init__(self):
+        if DEBUG:
+            print("CONTENT_SERVER.init()")
+        pass
+
+    def set_plugboards(self, plugboards, pb_func):
+        # This method is called with the plugboard that matches the format
+        # declared in use_plugboard_ext and a device name of CONTENT_SERVER
+        if DEBUG:
+            print("CONTENT_SERVER.set_plugboards()")
+            print('  using plugboard %s' % plugboards)
+        self.plugboards = plugboards
+        self.plugboard_func = pb_func
+
diff --git a/src/calibre/gui2/actions/catalog.py b/src/calibre/gui2/actions/catalog.py
index fad6e59294..093985d041 100644
--- a/src/calibre/gui2/actions/catalog.py
+++ b/src/calibre/gui2/actions/catalog.py
@@ -17,7 +17,7 @@ from calibre.gui2.actions import InterfaceAction
 class GenerateCatalogAction(InterfaceAction):
 
     name = 'Generate Catalog'
-    action_spec = (_('Create a catalog of the books in your calibre library'), None, None, None)
+    action_spec = (_('Create a catalog of the books in your calibre library'), 'catalog.png', 'Catalog builder', None)
     dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device'])
 
     def generate_catalog(self):
diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py
index 4d4f66eab1..8f21c17eaf 100644
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@@ -892,7 +892,7 @@ class DeviceMixin(object): # {{{
                 sub_dest_parts.append('')
             to = sub_dest_parts[0]
             fmts = sub_dest_parts[1]
-            subject = ';'.join(sub_dest_parts[2:]) 
+            subject = ';'.join(sub_dest_parts[2:])
             fmts = [x.strip().lower() for x in fmts.split(',')]
             self.send_by_mail(to, fmts, delete, subject=subject)
 
diff --git a/src/calibre/gui2/dialogs/tweak_epub.py b/src/calibre/gui2/dialogs/tweak_epub.py
index db6e93fd7a..a42fb07e40 100755
--- a/src/calibre/gui2/dialogs/tweak_epub.py
+++ b/src/calibre/gui2/dialogs/tweak_epub.py
@@ -12,6 +12,7 @@ from zipfile import ZipFile, ZIP_DEFLATED, ZIP_STORED
 
 from PyQt4.Qt import QDialog
 
+from calibre.constants import isosx, iswindows
 from calibre.gui2 import open_local_file
 from calibre.gui2.dialogs.tweak_epub_ui import Ui_Dialog
 from calibre.libunzip import extract as zipextract
@@ -42,11 +43,19 @@ class TweakEpub(QDialog, Ui_Dialog):
         self.move(parent_loc.x(),parent_loc.y())
 
     def cleanup(self):
+        if isosx:
+            try:
+                import appscript
+                self.finder = appscript.app('Finder')
+                self.finder.Finder_windows[os.path.basename(self._exploded)].close()
+            except:
+                # appscript fails to load on 10.4
+                pass
+
         # Delete directory containing exploded ePub
         if self._exploded is not None:
             shutil.rmtree(self._exploded, ignore_errors=True)
 
-
     def display_exploded(self):
         '''
         Generic subprocess launch of native file browser
diff --git a/src/calibre/library/server/content.py b/src/calibre/library/server/content.py
index 0c3edd1627..faa0a61baf 100644
--- a/src/calibre/library/server/content.py
+++ b/src/calibre/library/server/content.py
@@ -183,16 +183,37 @@ class ContentServer(object):
         if fmt is None:
             raise cherrypy.HTTPError(404, 'book: %d does not have format: %s'%(id, format))
         if format == 'EPUB':
+            # Get the original metadata
+            mi = self.db.get_metadata(id, index_is_id=True)
+
+            # Instantiate the CONTENT_SERVER driver
+            from calibre.devices.content_server.driver import CONTENT_SERVER
+            cs = CONTENT_SERVER()
+
+            # Get any EPUB plugboards for the content server
+            from calibre.gui2.device import find_plugboard, device_name_for_plugboards
+            plugboards = self.db.prefs.get('plugboards', {})
+
+            # Transform the metadata via the plugboard
+            if hasattr(cs, 'set_plugboards') and callable(cs.set_plugboards):
+                cs.set_plugboards(plugboards, find_plugboard)
+                cpb = find_plugboard(device_name_for_plugboards(cs), format.lower(), plugboards)
+                if cpb:
+                    newmi = mi.deepcopy_metadata()
+                    newmi.template_to_attribute(mi, cpb)
+                else:
+                    newmi = mi
+
+            # Write the updated file
             from tempfile import TemporaryFile
             from calibre.ebooks.metadata.meta import set_metadata
             raw = fmt.read()
             fmt = TemporaryFile()
             fmt.write(raw)
             fmt.seek(0)
-            set_metadata(fmt, self.db.get_metadata(id, index_is_id=True,
-                get_cover=True),
-                    'epub')
+            set_metadata(fmt, newmi, 'epub')
             fmt.seek(0)
+
         mt = guess_type('dummy.'+format.lower())[0]
         if mt is None:
             mt = 'application/octet-stream'

From 91c5356ac5d14ef807cce610431ec44aa6ab0ff0 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Mon, 18 Apr 2011 09:26:34 +0100
Subject: [PATCH 14/30] Suggested content server plugboard implementation for
 Greg

---
 src/calibre/customize/builtins.py             |  2 -
 .../devices/content_server/__init__.py        | 10 ---
 src/calibre/devices/content_server/driver.py  | 74 -------------------
 src/calibre/gui2/device.py                    | 20 +----
 src/calibre/gui2/preferences/plugboard.py     |  4 +-
 src/calibre/library/save_to_disk.py           | 32 ++++----
 src/calibre/library/server/content.py         | 27 +++----
 7 files changed, 34 insertions(+), 135 deletions(-)
 delete mode 100644 src/calibre/devices/content_server/__init__.py
 delete mode 100644 src/calibre/devices/content_server/driver.py

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 458bfec3fd..8f50481f84 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -582,7 +582,6 @@ from calibre.ebooks.snb.output import SNBOutput
 from calibre.customize.profiles import input_profiles, output_profiles
 
 from calibre.devices.apple.driver import ITUNES
-from calibre.devices.content_server.driver import CONTENT_SERVER_FOR_CONFIG
 from calibre.devices.hanlin.driver import HANLINV3, HANLINV5, BOOX, SPECTRA
 from calibre.devices.blackberry.driver import BLACKBERRY
 from calibre.devices.cybook.driver import CYBOOK, ORIZON
@@ -754,7 +753,6 @@ plugins += [
     EEEREADER,
     NEXTBOOK,
     ITUNES,
-    CONTENT_SERVER_FOR_CONFIG
     ]
 
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
diff --git a/src/calibre/devices/content_server/__init__.py b/src/calibre/devices/content_server/__init__.py
deleted file mode 100644
index 3d1a86922e..0000000000
--- a/src/calibre/devices/content_server/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/usr/bin/env python
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-from __future__ import with_statement
-
-__license__   = 'GPL v3'
-__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
-__docformat__ = 'restructuredtext en'
-
-
-
diff --git a/src/calibre/devices/content_server/driver.py b/src/calibre/devices/content_server/driver.py
deleted file mode 100644
index 84b14f8e62..0000000000
--- a/src/calibre/devices/content_server/driver.py
+++ /dev/null
@@ -1,74 +0,0 @@
-'''
-Created on 17 Apr 2011
-
-@author: GRiker, modeled on charles's Folder Device
-
-'''
-
-from calibre.constants import DEBUG
-from calibre.devices.interface import DevicePlugin
-from calibre.devices.usbms.deviceconfig import DeviceConfig
-from calibre.devices.usbms.driver import USBMS, BookList
-
-class DriverBase(DeviceConfig, DevicePlugin):
-    # Reduce to just the formats eligible for plugboard xforms
-    # These formats are shown in the customization dialog
-    FORMATS = ['epub', 'mobi']
-    USER_CAN_ADD_NEW_FORMATS = False
-
-    # Hide the standard customization widgets
-    SUPPORTS_SUB_DIRS = False
-    MUST_READ_METADATA = True
-    SUPPORTS_USE_AUTHOR_SORT = False
-
-
-# This class is added to the standard device plugin chain, so that it can
-# be configured. It has invalid vendor_id etc, so it will never match a
-# device. The 'real' CONTENT_SERVER will use the config from it.
-class CONTENT_SERVER_FOR_CONFIG(USBMS):
-    name           = 'Content Server Interface'
-    gui_name       = 'Content Server'
-    description    = _('Enables metadata plugboards to be used with Content Server.')
-    author         = 'GRiker'
-    supported_platforms = ['windows', 'osx', 'linux']
-
-    VENDOR_ID   = [0xffff]
-    PRODUCT_ID  = [0xffff]
-    BCD         = [0xffff]
-    DEVICE_PLUGBOARD_NAME = 'CONTENT_SERVER'
-
-    def config_widget(cls):
-        '''
-        Configure a minimal QWidget
-        Better to simply disable the config_widget altogether
-        '''
-        cw = DriverBase.config_widget()
-        # Turn off the Save template
-        cw.opt_save_template.setVisible(False)
-        cw.label.setVisible(False)
-        # Hide the up/down arrows
-        cw.column_up.setVisible(False)
-        cw.column_down.setVisible(False)
-        # Retitle
-        cw.groupBox.setTitle(_("Enable metadata plugboards for the following formats:"))
-        return cw
-
-class CONTENT_SERVER(USBMS):
-
-    FORMATS     = CONTENT_SERVER_FOR_CONFIG.FORMATS
-    DEVICE_PLUGBOARD_NAME = 'CONTENT_SERVER'
-
-    def __init__(self):
-        if DEBUG:
-            print("CONTENT_SERVER.init()")
-        pass
-
-    def set_plugboards(self, plugboards, pb_func):
-        # This method is called with the plugboard that matches the format
-        # declared in use_plugboard_ext and a device name of CONTENT_SERVER
-        if DEBUG:
-            print("CONTENT_SERVER.set_plugboards()")
-            print('  using plugboard %s' % plugboards)
-        self.plugboards = plugboards
-        self.plugboard_func = pb_func
-
diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py
index 8f21c17eaf..2e252047af 100644
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@@ -29,8 +29,7 @@ from calibre.ebooks.metadata.meta import set_metadata
 from calibre.constants import DEBUG
 from calibre.utils.config import prefs, tweaks
 from calibre.utils.magick.draw import thumbnail
-from calibre.library.save_to_disk import plugboard_any_device_value, \
-                                         plugboard_any_format_value
+from calibre.library.save_to_disk import find_plugboard
 # }}}
 
 class DeviceJob(BaseJob): # {{{
@@ -93,23 +92,6 @@ class DeviceJob(BaseJob): # {{{
 
     # }}}
 
-def find_plugboard(device_name, format, plugboards):
-    cpb = None
-    if format in plugboards:
-        cpb = plugboards[format]
-    elif plugboard_any_format_value in plugboards:
-        cpb = plugboards[plugboard_any_format_value]
-    if cpb is not None:
-        if device_name in cpb:
-            cpb = cpb[device_name]
-        elif plugboard_any_device_value in cpb:
-            cpb = cpb[plugboard_any_device_value]
-        else:
-            cpb = None
-    if DEBUG:
-        prints('Device using plugboard', format, device_name, cpb)
-    return cpb
-
 def device_name_for_plugboards(device_class):
     if hasattr(device_class, 'DEVICE_PLUGBOARD_NAME'):
         return device_class.DEVICE_PLUGBOARD_NAME
diff --git a/src/calibre/gui2/preferences/plugboard.py b/src/calibre/gui2/preferences/plugboard.py
index 8f2b084d76..c5db7074dc 100644
--- a/src/calibre/gui2/preferences/plugboard.py
+++ b/src/calibre/gui2/preferences/plugboard.py
@@ -15,6 +15,7 @@ from calibre.gui2.preferences.plugboard_ui import Ui_Form
 from calibre.customize.ui import metadata_writers, device_plugins
 from calibre.library.save_to_disk import plugboard_any_format_value, \
                         plugboard_any_device_value, plugboard_save_to_disk_value
+from calibre.library.server.content import plugboard_content_server_value
 from calibre.utils.formatter import validation_formatter
 
 
@@ -74,7 +75,8 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
                 self.devices.append(n)
         self.devices.sort(cmp=lambda x, y: cmp(x.lower(), y.lower()))
         self.devices.insert(1, plugboard_save_to_disk_value)
-        self.devices.insert(2, plugboard_any_device_value)
+        self.devices.insert(1, plugboard_content_server_value)
+        self.devices.insert(1, plugboard_any_device_value)
         self.new_device.addItems(self.devices)
 
         self.formats = ['']
diff --git a/src/calibre/library/save_to_disk.py b/src/calibre/library/save_to_disk.py
index 96c42e6e0e..3c57af40a8 100644
--- a/src/calibre/library/save_to_disk.py
+++ b/src/calibre/library/save_to_disk.py
@@ -51,6 +51,23 @@ for x in FORMAT_ARG_DESCS:
     FORMAT_ARGS[x] = ''
 
 
+def find_plugboard(device_name, format, plugboards):
+    cpb = None
+    if format in plugboards:
+        cpb = plugboards[format]
+    elif plugboard_any_format_value in plugboards:
+        cpb = plugboards[plugboard_any_format_value]
+    if cpb is not None:
+        if device_name in cpb:
+            cpb = cpb[device_name]
+        elif plugboard_any_device_value in cpb:
+            cpb = cpb[plugboard_any_device_value]
+        else:
+            cpb = None
+    if DEBUG:
+        prints('Device using plugboard', format, device_name, cpb)
+    return cpb
+
 def config(defaults=None):
     if defaults is None:
         c = Config('save_to_disk', _('Options to control saving to disk'))
@@ -279,20 +296,7 @@ def do_save_book_to_disk(id_, mi, cover, plugboards,
     written = False
     for fmt in formats:
         global plugboard_save_to_disk_value, plugboard_any_format_value
-        dev_name = plugboard_save_to_disk_value
-        cpb = None
-        if fmt in plugboards:
-            cpb = plugboards[fmt]
-            if dev_name in cpb:
-                cpb = cpb[dev_name]
-            else:
-                cpb = None
-        if cpb is None and plugboard_any_format_value in plugboards:
-            cpb = plugboards[plugboard_any_format_value]
-            if dev_name in cpb:
-                cpb = cpb[dev_name]
-            else:
-                cpb = None
+        cpb = find_plugboard(plugboard_save_to_disk_value, fmt, plugboards)
         # Leave this here for a while, in case problems arise.
         if cpb is not None:
             prints('Save-to-disk using plugboard:', fmt, cpb)
diff --git a/src/calibre/library/server/content.py b/src/calibre/library/server/content.py
index faa0a61baf..8d9e71c528 100644
--- a/src/calibre/library/server/content.py
+++ b/src/calibre/library/server/content.py
@@ -12,9 +12,13 @@ import cherrypy
 from calibre import fit_image, guess_type
 from calibre.utils.date import fromtimestamp
 from calibre.library.caches import SortKeyGenerator
+from calibre.library.save_to_disk import find_plugboard
+
 from calibre.utils.magick.draw import save_cover_data_to, Image, \
         thumbnail as generate_thumbnail
 
+plugboard_content_server_value = 'content_server'
+
 class CSSortKeyGenerator(SortKeyGenerator):
 
     def __init__(self, fields, fm, db_prefs):
@@ -186,23 +190,16 @@ class ContentServer(object):
             # Get the original metadata
             mi = self.db.get_metadata(id, index_is_id=True)
 
-            # Instantiate the CONTENT_SERVER driver
-            from calibre.devices.content_server.driver import CONTENT_SERVER
-            cs = CONTENT_SERVER()
-
             # Get any EPUB plugboards for the content server
-            from calibre.gui2.device import find_plugboard, device_name_for_plugboards
             plugboards = self.db.prefs.get('plugboards', {})
-
-            # Transform the metadata via the plugboard
-            if hasattr(cs, 'set_plugboards') and callable(cs.set_plugboards):
-                cs.set_plugboards(plugboards, find_plugboard)
-                cpb = find_plugboard(device_name_for_plugboards(cs), format.lower(), plugboards)
-                if cpb:
-                    newmi = mi.deepcopy_metadata()
-                    newmi.template_to_attribute(mi, cpb)
-                else:
-                    newmi = mi
+            cpb = find_plugboard(plugboard_content_server_value,
+                                 'epub', plugboards)
+            if cpb:
+                # Transform the metadata via the plugboard
+                newmi = mi.deepcopy_metadata()
+                newmi.template_to_attribute(mi, cpb)
+            else:
+                newmi = mi
 
             # Write the updated file
             from tempfile import TemporaryFile

From 3709dcbc621e152472184bacbb056e16e1aef1fe Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Mon, 18 Apr 2011 10:43:20 +0100
Subject: [PATCH 15/30] Add check for valid formats

---
 src/calibre/gui2/preferences/plugboard.py | 16 +++++++++++++++-
 src/calibre/library/server/content.py     |  1 +
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/calibre/gui2/preferences/plugboard.py b/src/calibre/gui2/preferences/plugboard.py
index c5db7074dc..7036ddf8f3 100644
--- a/src/calibre/gui2/preferences/plugboard.py
+++ b/src/calibre/gui2/preferences/plugboard.py
@@ -15,7 +15,8 @@ from calibre.gui2.preferences.plugboard_ui import Ui_Form
 from calibre.customize.ui import metadata_writers, device_plugins
 from calibre.library.save_to_disk import plugboard_any_format_value, \
                         plugboard_any_device_value, plugboard_save_to_disk_value
-from calibre.library.server.content import plugboard_content_server_value
+from calibre.library.server.content import plugboard_content_server_value, \
+                                        plugboard_content_server_formats
 from calibre.utils.formatter import validation_formatter
 
 
@@ -69,13 +70,17 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
             self.device_label.setText(_('Device currently connected: None'))
 
         self.devices = ['', 'APPLE', 'FOLDER_DEVICE']
+        self.device_to_formats_map = {}
         for device in device_plugins():
             n = device_name_for_plugboards(device)
+            self.device_to_formats_map[n] = device.FORMATS
             if n not in self.devices:
                 self.devices.append(n)
         self.devices.sort(cmp=lambda x, y: cmp(x.lower(), y.lower()))
         self.devices.insert(1, plugboard_save_to_disk_value)
         self.devices.insert(1, plugboard_content_server_value)
+        self.device_to_formats_map[plugboard_content_server_value] = \
+                        plugboard_content_server_formats
         self.devices.insert(1, plugboard_any_device_value)
         self.new_device.addItems(self.devices)
 
@@ -232,6 +237,15 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
                      show=True)
             self.new_device.setCurrentIndex(0)
             return
+        if self.current_device in self.device_to_formats_map:
+            allowable_formats = self.device_to_formats_map[self.current_device]
+            if self.current_format not in allowable_formats:
+                error_dialog(self, '',
+                     _('The {0} device does not support the {1} format.').
+                                format(self.current_device, self.current_format),
+                     show=True)
+                self.new_device.setCurrentIndex(0)
+                return
         self.set_fields()
 
     def new_format_changed(self, txt):
diff --git a/src/calibre/library/server/content.py b/src/calibre/library/server/content.py
index 8d9e71c528..08de4faecd 100644
--- a/src/calibre/library/server/content.py
+++ b/src/calibre/library/server/content.py
@@ -18,6 +18,7 @@ from calibre.utils.magick.draw import save_cover_data_to, Image, \
         thumbnail as generate_thumbnail
 
 plugboard_content_server_value = 'content_server'
+plugboard_content_server_formats = ['epub']
 
 class CSSortKeyGenerator(SortKeyGenerator):
 

From 09da88b6d18d0c4bf09e126f6af7195069b15863 Mon Sep 17 00:00:00 2001
From: Lee <ldolse@yahoo.com>
Date: Mon, 18 Apr 2011 18:06:10 +0800
Subject: [PATCH 16/30] port overdrive plugin to 8.x framework, remove from 7.x
 framework

---
 src/calibre/customize/builtins.py             |   8 +-
 src/calibre/ebooks/metadata/covers.py         |  27 -
 src/calibre/ebooks/metadata/fetch.py          |  21 -
 src/calibre/ebooks/metadata/overdrive.py      | 459 ----------------
 src/calibre/ebooks/metadata/sources/base.py   |   4 +-
 .../ebooks/metadata/sources/overdrive.py      | 510 ++++++++++++++++++
 6 files changed, 516 insertions(+), 513 deletions(-)
 delete mode 100644 src/calibre/ebooks/metadata/overdrive.py
 create mode 100755 src/calibre/ebooks/metadata/sources/overdrive.py

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index f4a8c6b6bc..75c02c7e00 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -633,14 +633,14 @@ if test_eight_code:
 # }}}
 else:
     from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
-        KentDistrictLibrary, Overdrive
+        KentDistrictLibrary
     from calibre.ebooks.metadata.douban import DoubanBooks
     from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
     from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
-            AmazonCovers, DoubanCovers, OverdriveCovers
+            AmazonCovers, DoubanCovers
 
-    plugins += [GoogleBooks, ISBNDB, Amazon, Overdrive,
-        OpenLibraryCovers, AmazonCovers, DoubanCovers, OverdriveCovers,
+    plugins += [GoogleBooks, ISBNDB, Amazon,
+        OpenLibraryCovers, AmazonCovers, DoubanCovers,
         NiceBooksCovers, KentDistrictLibrary, DoubanBooks, NiceBooks]
 
 plugins += [
diff --git a/src/calibre/ebooks/metadata/covers.py b/src/calibre/ebooks/metadata/covers.py
index f705317f59..10acff4e61 100644
--- a/src/calibre/ebooks/metadata/covers.py
+++ b/src/calibre/ebooks/metadata/covers.py
@@ -151,33 +151,6 @@ class AmazonCovers(CoverDownload): # {{{
 
 # }}}
 
-class OverdriveCovers(CoverDownload): # {{{
-
-    name = 'overdrive.com covers'
-    description = _('Download covers from Overdrive')
-    author = 'Kovid Goyal'
-
-
-    def has_cover(self, mi, ans, timeout=5.):
-        if not mi.authors or not mi.title:
-            return False
-        return True
-
-    def get_covers(self, mi, result_queue, abort, timeout=5.):
-        if not mi.isbn:
-            return
-        from calibre.ebooks.metadata.overdrive import get_cover_url
-        br = browser()
-        try:
-            url = get_cover_url(mi.isbn, mi.title, mi.authors, br)
-            cover_data = br.open_novisit(url).read()
-            result_queue.put((True, cover_data, 'jpg', self.name))
-        except Exception, e:
-            result_queue.put((False, self.exception_to_string(e),
-                traceback.format_exc(), self.name))
-
-# }}}
-
 def check_for_cover(mi, timeout=5.): # {{{
     from calibre.customize.ui import cover_sources
     ans = Event()
diff --git a/src/calibre/ebooks/metadata/fetch.py b/src/calibre/ebooks/metadata/fetch.py
index fb01c5dd71..e1fac50d16 100644
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@@ -250,27 +250,6 @@ class Amazon(MetadataSource): # {{{
 
     # }}}
 
-class Overdrive(MetadataSource): # {{{
-
-    name = 'Overdrive'
-    metadata_type = 'social'
-    description = _('Downloads  metadata from the Overdrive library network')
-
-    has_html_comments = True
-
-    def fetch(self):
-        if not self.isbn:
-            return
-        from calibre.ebooks.metadata.overdrive import get_social_metadata
-        try:
-            self.results = get_social_metadata(self.title, self.book_author, self.isbn)
-            
-        except Exception, e:
-            self.exception = e
-            self.tb = traceback.format_exc()
-
-    # }}}
-
 class KentDistrictLibrary(MetadataSource): # {{{
 
     name = 'Kent District Library'
diff --git a/src/calibre/ebooks/metadata/overdrive.py b/src/calibre/ebooks/metadata/overdrive.py
deleted file mode 100644
index 38d6d730ff..0000000000
--- a/src/calibre/ebooks/metadata/overdrive.py
+++ /dev/null
@@ -1,459 +0,0 @@
-#!/usr/bin/env  python
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-__docformat__ = 'restructuredtext en'
-
-'''
-Fetch metadata using Overdrive Content Reserve
-'''
-import sys, re, random, urllib, mechanize, copy
-from threading import RLock
-
-from lxml import html, etree
-from lxml.html import soupparser
-
-from calibre import browser
-from calibre.ebooks.metadata import check_isbn
-from calibre.ebooks.metadata.sources.base import Source
-from calibre.ebooks.metadata.book.base import Metadata
-from calibre.ebooks.chardet import xml_to_unicode
-from calibre.library.comments import sanitize_comments_html
-
-ovrdrv_data_cache = {}
-cover_url_cache = {}
-cache_lock = RLock()
-base_url = 'http://search.overdrive.com/'
-
-
-def create_query(self, title=None, authors=None, identifiers={}):
-    q = ''
-    if title or authors:
-        def build_term(prefix, parts):
-            return ' '.join('in'+prefix + ':' + x for x in parts)
-        title_tokens = list(self.get_title_tokens(title, False))
-        if title_tokens:
-            q += build_term('title', title_tokens)
-        author_tokens = self.get_author_tokens(authors,
-                only_first_author=True)
-        if author_tokens:
-            q += ('+' if q else '') + build_term('author',
-                    author_tokens)
-
-    if isinstance(q, unicode):
-        q = q.encode('utf-8')
-    if not q:
-        return None
-    return BASE_URL+urlencode({
-        'q':q,
-        })
-
-
-def get_base_referer():
-    choices = [
-        'http://overdrive.chipublib.org/82DC601D-7DDE-4212-B43A-09D821935B01/10/375/en/',
-        'http://emedia.clevnet.org/9D321DAD-EC0D-490D-BFD8-64AE2C96ECA8/10/241/en/',
-        'http://singapore.lib.overdrive.com/F11D55BE-A917-4D63-8111-318E88B29740/10/382/en/',
-        'http://ebooks.nypl.org/20E48048-A377-4520-BC43-F8729A42A424/10/257/en/',
-        'http://spl.lib.overdrive.com/5875E082-4CB2-4689-9426-8509F354AFEF/10/335/en/'
-    ]
-    return choices[random.randint(0, len(choices)-1)]
-
-def format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid):
-    fix_slashes = re.compile(r'\\/')
-    thumbimage = fix_slashes.sub('/', thumbimage)
-    worldcatlink = fix_slashes.sub('/', worldcatlink)
-    cover_url = re.sub('(?P<img>(Ima?g(eType-)?))200', '\g<img>100', thumbimage)
-    social_metadata_url = base_url+'TitleInfo.aspx?ReserveID='+reserveid+'&FormatID='+formatid
-    series_num = ''
-    if not series:
-        if subtitle:
-            title = od_title+': '+subtitle
-        else:
-            title = od_title
-    else:
-        title = od_title
-        m = re.search("([0-9]+$)", subtitle)
-        if m:
-            series_num = float(m.group(1))
-    return [cover_url, social_metadata_url, worldcatlink, series, series_num, publisher, creators, reserveid, title]
-
-def safe_query(br, query_url):
-    '''
-    The query must be initialized by loading an empty search results page
-    this page attempts to set a cookie that Mechanize doesn't like
-    copy the cookiejar to a separate instance and make a one-off request with the temp cookiejar
-    '''
-    goodcookies = br._ua_handlers['_cookies'].cookiejar
-    clean_cj = mechanize.CookieJar()
-    cookies_to_copy = []
-    for cookie in goodcookies:
-        copied_cookie = copy.deepcopy(cookie)
-        cookies_to_copy.append(copied_cookie)
-    for copied_cookie in cookies_to_copy:
-        clean_cj.set_cookie(copied_cookie)
-
-    br.open_novisit(query_url)
-    
-    br.set_cookiejar(clean_cj)
-
-
-def overdrive_search(br, q, title, author):
-    q_query = q+'default.aspx/SearchByKeyword'
-    q_init_search = q+'SearchResults.aspx'
-    # get first author as string - convert this to a proper cleanup function later
-    s = Source(None)
-    print "printing list with string:"
-    #print list(s.get_author_tokens(['J. R. R. Tolkien']))
-    print "printing list with author "+str(author)+":"
-    print list(s.get_author_tokens(author))
-    author_tokens = list(s.get_author_tokens(author))
-    print "there are "+str(len(author_tokens))+" author tokens"
-    for token in author_tokens:
-        print "cleaned up author token is: "+str(token)
-
-
-    title_tokens = list(s.get_title_tokens(title))
-    print "there are "+str(len(title_tokens))+" title tokens"
-    for token in title_tokens:
-        print "cleaned up title token is: "+str(token)
-
-    if len(title_tokens) >= len(author_tokens):
-        initial_q = ' '.join(title_tokens)
-        xref_q = '+'.join(author_tokens)
-    else:
-        initial_q = ' '.join(author_tokens)
-        xref_q = '+'.join(title_tokens)
-
-    print "initial query is "+str(initial_q)
-    print "cross reference query is "+str(xref_q)
-    q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
-    query = '{"szKeyword":"'+initial_q+'"}'
-
-    # main query, requires specific Content Type header
-    req = mechanize.Request(q_query)
-    req.add_header('Content-Type', 'application/json; charset=utf-8')
-    br.open_novisit(req, query)
-
-    print "q_init_search is "+q_init_search
-    # initiate the search without messing up the cookiejar
-    safe_query(br, q_init_search)
-
-    # get the search results object
-    results = False
-    while results == False:
-        xreq = mechanize.Request(q_xref)
-        xreq.add_header('X-Requested-With', 'XMLHttpRequest')
-        xreq.add_header('Referer', q_init_search)
-        xreq.add_header('Accept', 'application/json, text/javascript, */*')
-        raw = br.open_novisit(xreq).read()
-        print "overdrive search result is:\n"+raw
-        for m in re.finditer(ur'"iTotalDisplayRecords":(?P<displayrecords>\d+).*?"iTotalRecords":(?P<totalrecords>\d+)', raw):
-            if int(m.group('displayrecords')) >= 1:
-                results = True
-            elif int(m.group('totalrecords')) >= 1:
-                xref_q = ''
-                q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
-            elif int(m.group('totalrecords')) == 0:
-                return ''
-        
-    print "\n\nsorting results"
-    return sort_ovrdrv_results(raw, title, title_tokens, author, author_tokens)
-
-
-def sort_ovrdrv_results(raw, title=None, title_tokens=None, author=None, author_tokens=None, ovrdrv_id=None):
-    print "\ntitle to search for is "+str(title)+"\nauthor to search for is "+str(author)
-    close_matches = []
-    raw = re.sub('.*?\[\[(?P<content>.*?)\]\].*', '[[\g<content>]]', raw)
-    results = eval(raw)
-    print "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
-    #print results
-    # The search results are either from a keyword search or a multi-format list from a single ID,
-    # sort through the results for closest match/format
-    if results:
-        for reserveid, od_title, subtitle, edition, series, publisher, format, formatid, creators, \
-                thumbimage, shortdescription, worldcatlink, excerptlink, creatorfile, sorttitle, \
-                availabletolibrary, availabletoretailer, relevancyrank, unknown1, unknown2, unknown3 in results:
-            print "this record's title is "+od_title+", subtitle is "+subtitle+", author[s] are "+creators+", series is "+series
-            if ovrdrv_id is not None and int(formatid) in [1, 50, 410, 900]:
-                print "overdrive id is not None, searching based on format type priority"
-                return format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)            
-            else:
-                creators = creators.split(', ')
-                print "split creators from results are: "+str(creators)
-                # if an exact match in a preferred format occurs
-                if creators[0] == author[0] and od_title == title and int(formatid) in [1, 50, 410, 900]:
-                    print "Got Exact Match!!!"
-                    return format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)
-                else:
-                    close_title_match = False
-                    close_author_match = False
-                    print "format id is "+str(formatid)
-                    for token in title_tokens:
-                        print "attempting to find "+str(token)+" title token"
-                        if od_title.lower().find(token.lower()) != -1:
-                            print "matched token"
-                            close_title_match = True
-                        else:
-                            print "token didn't match"
-                            close_title_match = False
-                            break
-                    for token in author_tokens:
-                        print "attempting to find "+str(token)+" author token"
-                        if creators[0].lower().find(token.lower()) != -1:
-                            print "matched token"
-                            close_author_match = True
-                        else:
-                            print "token didn't match"
-                            close_author_match = False
-                            break
-                    if close_title_match and close_author_match and int(formatid) in [1, 50, 410, 900]:
-                        if subtitle and series:
-                            close_matches.insert(0, format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
-                        else:
-                            close_matches.append(format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
-        if close_matches:
-            return close_matches[0]
-        else:
-            return ''
-    else:
-        return ''
-
-
-
-def overdrive_get_record(br, q, ovrdrv_id):
-    search_url = q+'SearchResults.aspx?ReserveID={'+ovrdrv_id+'}'
-    results_url = q+'SearchResults.svc/GetResults?sEcho=1&iColumns=18&sColumns=ReserveID%2CTitle%2CSubtitle%2CEdition%2CSeries%2CPublisher%2CFormat%2CFormatID%2CCreators%2CThumbImage%2CShortDescription%2CWorldCatLink%2CExcerptLink%2CCreatorFile%2CSortTitle%2CAvailableToLibrary%2CAvailableToRetailer%2CRelevancyRank&iDisplayStart=0&iDisplayLength=10&sSearch=&bEscapeRegex=true&iSortingCols=1&iSortCol_0=17&sSortDir_0=asc'
-
-    # get the base url to set the proper session cookie
-    br.open_novisit(q)
-
-    # initialize the search
-    safe_query(br, search_url)
-
-    # get the results
-    req = mechanize.Request(results_url)
-    req.add_header('X-Requested-With', 'XMLHttpRequest')
-    req.add_header('Referer', search_url)
-    req.add_header('Accept', 'application/json, text/javascript, */*')
-    raw = br.open_novisit(req)
-    raw = str(list(raw))
-    return sort_ovrdrv_results(raw, None, None, None, ovrdrv_id)
-
-
-def find_ovrdrv_data(br, title, author, isbn, ovrdrv_id=None):
-    print "in find_ovrdrv_data, title is "+str(title)+", author is "+str(author)+", overdrive id is "+str(ovrdrv_id)
-    q = base_url
-    if ovrdrv_id is None:
-       return overdrive_search(br, q, title, author)
-    else:
-       return overdrive_get_record(br, q, ovrdrv_id)
-
-
-
-def to_ovrdrv_data(br, title, author, isbn, ovrdrv_id=None):
-    print "starting to_ovrdrv_data"
-    with cache_lock:
-        ans = ovrdrv_data_cache.get(isbn, None)
-    if ans:
-        print "inside to_ovrdrv_data, cache lookup successful, ans is "+str(ans)
-        return ans
-    if ans is False:
-        print "inside to_ovrdrv_data, ans returned False"
-        return None
-    try:
-        print "trying to retrieve data, running find_ovrdrv_data"
-        ovrdrv_data = find_ovrdrv_data(br, title, author, isbn, ovrdrv_id)
-        print "ovrdrv_data is "+str(ovrdrv_data)
-    except:
-        import traceback
-        traceback.print_exc()
-        ovrdrv_data = None
-
-    with cache_lock:
-        ovrdrv_data_cache[isbn] = ovrdrv_data if ovrdrv_data else False
-    if ovrdrv_data:
-        from calibre.ebooks.metadata.xisbn import xisbn
-        for i in xisbn.get_associated_isbns(isbn):
-            with cache_lock:
-                ovrdrv_data_cache[i] = ovrdrv_data
-
-    return ovrdrv_data
-
-
-def get_social_metadata(title, authors, isbn, ovrdrv_id=None):
-    author = authors[0]
-    mi = Metadata(title, authors)
-    br = browser()
-    print "calling to_ovrdrv_data from inside get_social_metadata"
-    ovrdrv_data = to_ovrdrv_data(br, title, authors, isbn, ovrdrv_id)
-
-    #[cover_url, social_metadata_url, worldcatlink, series, series_num, publisher, creators, reserveid, title]
-
-    if len(ovrdrv_data[3]) > 1:
-        mi.series = ovrdrv_data[3]
-        if ovrdrv_data[4]:
-            mi.series_index = ovrdrv_data[4]
-    mi.publisher = ovrdrv_data[5]
-    mi.authors = ovrdrv_data[6]
-    if ovrdrv_id is None:
-        ovrdrv_id = ovrdrv_data[7]
-    mi.set_identifier('overdrive', ovrdrv_id)
-    mi.title = ovrdrv_data[8]
-    print "populated basic social metadata, getting detailed metadata"
-    if ovrdrv_data and get_metadata_detail(br, ovrdrv_data[1], mi, isbn):
-        return mi
-    print "failed to get detailed metadata, returning basic info"
-    return mi
-
-def get_cover_url(isbn, title, author, br, ovrdrv_id=None):
-    print "starting get_cover_url"
-    print "title is "+str(title)
-    print "author is "+str(author[0])
-    print "isbn is "+str(isbn)
-    print "ovrdrv_id is "+str(ovrdrv_id)
-
-    with cache_lock:
-        ans = cover_url_cache.get(isbn, None)
-        #ans = cover_url_cache.get(ovrdrv_id, None)
-    if ans:
-        print "cover url cache lookup returned positive, ans is "+str(ans)
-        return ans
-    if ans is False:
-        "cover url cache lookup returned false"
-        return None
-    print "in get_cover_url, calling to_ovrdrv_data function"
-    ovrdrv_data = to_ovrdrv_data(br, title, author, isbn, ovrdrv_id)
-    if ovrdrv_data:
-        ans = ovrdrv_data[0]
-        print "inside get_cover_url, got url from to_ovrdrv_data, ans is "+str(ans)
-        if ans:
-            print "writing cover url to url cache"
-            with cache_lock:
-                cover_url_cache[isbn] = ans
-                #cover_url_cache[ovrdrv_id] = ans
-            return ans
-            
-    with cache_lock:
-        print "marking cover url cache for this isbn false"
-        cover_url_cache[isbn] = False
-    return None
-
-def _get_cover_url(br, ovrdrv_data):
-    q = ovrdrv_data[1]
-    try:
-        raw = br.open_novisit(q).read()
-    except Exception, e:
-        if callable(getattr(e, 'getcode', None)) and \
-                e.getcode() == 404:
-            return None
-        raise
-    if '<title>404 - ' in raw:
-        return None
-    raw = xml_to_unicode(raw, strip_encoding_pats=True,
-            resolve_entities=True)[0]
-    try:
-        root = soupparser.fromstring(raw)
-    except:
-        return False
-
-    imgs = root.xpath('//img[@id="prodImage" and @src]')
-    if imgs:
-        src = imgs[0].get('src')
-        parts = src.split('/')
-        if len(parts) > 3:
-            bn = parts[-1]
-            sparts = bn.split('_')
-            if len(sparts) > 2:
-                bn = sparts[0] + sparts[-1]
-                return ('/'.join(parts[:-1]))+'/'+bn
-    return None
-
-def get_metadata_detail(br, metadata_url, mi, isbn=None):
-    try:
-        raw = br.open_novisit(metadata_url).read()
-    except Exception, e:
-        if callable(getattr(e, 'getcode', None)) and \
-                e.getcode() == 404:
-            return False
-        raise   
-    raw = xml_to_unicode(raw, strip_encoding_pats=True,
-            resolve_entities=True)[0]
-    try:
-        root = soupparser.fromstring(raw)
-    except:
-        return False
-
-    isbn = check_isbn(isbn)
-
-    pub_date = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblPubDate']/text()")
-    lang = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblLanguage']/text()")
-    subjects = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblSubjects']/text()")
-    ebook_isbn = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblIdentifier']/text()")
-    desc = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblDescription']/ancestor::div[1]")
-
-    if pub_date:
-        from calibre.utils.date import parse_date
-        mi.pubdate = parse_date(pub_date[0].strip())
-    if lang:
-        mi.language = lang[0].strip()
-        print "languages is "+str(mi.language)
-    if ebook_isbn and isbn is None:
-        print "ebook isbn is "+str(ebook_isbn[0])
-        mi.set_identifier('isbn', ebook_isbn)
-    #elif isbn is not None:
-    #    mi.set_identifier('isbn', isbn)
-    if subjects:
-        mi.tags = [tag.strip() for tag in subjects[0].split(',')]
-        print "tags are "+str(mi.tags)
-    if desc:
-        desc = desc[0]
-        desc = html.tostring(desc, method='html', encoding=unicode).strip()
-        # remove all attributes from tags
-        desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
-        # Remove comments
-        desc = re.sub(r'(?s)<!--.*?-->', '', desc)
-        mi.comments = sanitize_comments_html(desc)
-
-    return True
-
-def main(args=sys.argv):
-    print "running through main tests"
-    import tempfile, os, time
-    tdir = tempfile.gettempdir()
-    br = browser()
-    for ovrdrv_id, isbn, title, author in [
-            #(None, '0899661343', 'On the Road', ['Jack Kerouac']), # basic test, no series, single author
-            #(None, '9780061952838', 'The Fellowship of the Ring', ['J. R. R. Tolkien']), # Series test, multi-author
-            #(None, '9780061952838', 'The Two Towers (The Lord of the Rings, Book II)', ['J. R. R. Tolkien']), # Series test, book 2
-            #(None, '9780618153985', 'The Fellowship of the Ring (The Lord of the Rings, Part 1)', ['J.R.R. Tolkien']),
-            #('57844706-20fa-4ace-b5ee-3470b1b52173', None, 'The Two Towers', ['J. R. R. Tolkien']), # Series test, w/ ovrdrv id
-            #(None, '9780345505057', 'Deluge', ['Anne McCaffrey']) # Multiple authors
-            #(None, None, 'Deluge', ['Anne McCaffrey']) # Empty ISBN
-            #(None, None, 'On the Road', ['Jack Kerouac']), # Nonetype ISBN
-            #(None, '9780345435279', 'A Caress of Twilight', ['Laurell K. Hamilton']),
-            #(None, '9780606087230', 'The Omnivore\'s Dilemma : A Natural History of Four Meals', ['Michael Pollan']), # Subtitle colon
-            #(None, '9780061747649', 'Mental_Floss Presents: Condensed Knowledge', ['Will Pearson', 'Mangesh Hattikudur']),
-            #(None, '9781400050802', 'The Zombie Survival Guide', ['Max Brooks']), # Two books with this title by this author
-            #(None, '9781775414315', 'The Worst Journey in the World / Antarctic 1910-1913', ['Apsley Cherry-Garrard']), # Garbage sub-title
-            #(None, '9780440335160', 'Outlander', ['Diana Gabaldon']), # Returns lots of results to sort through to get the best match
-            (None, '9780345509741', 'The Horror Stories of Robert E. Howard', ['Robert E. Howard']), # Complex title with initials/dots stripped, some results don't have a cover
-            ]:
-        cpath = os.path.join(tdir, title+'.jpg')
-        print "cpath is "+cpath
-        st = time.time()
-        curl = get_cover_url(isbn, title, author, br, ovrdrv_id)
-        print '\n\n Took ', time.time() - st, ' to get basic metadata\n\n'
-        if curl is None:
-            print 'No cover found for', title
-        else:
-            print "curl is "+curl
-            #open(cpath, 'wb').write(br.open_novisit(curl).read())
-            #print 'Cover for', title, 'saved to', cpath
-        st = time.time()
-        print get_social_metadata(title, author, isbn, ovrdrv_id)
-        print '\n\n Took ', time.time() - st, ' to get detailed metadata\n\n'
-
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index 5911a357ac..53fe9a4c2d 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -313,8 +313,8 @@ class Source(Plugin):
                 (r'(\d+),(\d+)', r'\1\2'),
                 # Remove hyphens only if they have whitespace before them
                 (r'(\s-)', ' '),
-                # Remove single quotes
-                (r"'", ''),
+                # Remove single quotes not followed by 's'
+                (r"'(?!s)", ''),
                 # Replace other special chars with a space
                 (r'''[:,;+!@#$%^&*(){}.`~"\s\[\]/]''', ' ')
             ]]
diff --git a/src/calibre/ebooks/metadata/sources/overdrive.py b/src/calibre/ebooks/metadata/sources/overdrive.py
new file mode 100755
index 0000000000..6950711da4
--- /dev/null
+++ b/src/calibre/ebooks/metadata/sources/overdrive.py
@@ -0,0 +1,510 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
+__docformat__ = 'restructuredtext en'
+
+'''
+Fetch metadata using Overdrive Content Reserve
+'''
+import sys, re, random, urllib, mechanize, copy
+from threading import RLock
+from Queue import Queue, Empty
+
+from lxml import html, etree
+from lxml.html import soupparser
+
+from calibre import browser
+from calibre.ebooks.metadata import check_isbn
+from calibre.ebooks.metadata.sources.base import Source
+from calibre.ebooks.metadata.book.base import Metadata
+from calibre.ebooks.chardet import xml_to_unicode
+from calibre.library.comments import sanitize_comments_html
+
+ovrdrv_data_cache = {}
+cover_url_cache = {}
+cache_lock = RLock()
+base_url = 'http://search.overdrive.com/'
+
+
+class OverDrive(Source):
+
+    name = 'Overdrive'
+    description = _('Downloads metadata from Overdrive\'s Content Reserve')
+
+    capabilities = frozenset(['identify', 'cover'])
+    touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
+        'comments', 'publisher', 'identifier:isbn', 'series', 'series_num',
+        'language', 'identifier:overdrive'])
+    has_html_comments = True
+    supports_gzip_transfer_encoding = False
+    cached_cover_url_is_reliable = True
+
+    def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
+            identifiers={}, timeout=30):
+        ovrdrv_id = identifiers.get('overdrive', None)
+        isbn = identifiers.get('isbn', None)
+
+        br = self.browser
+        print "in identify, calling to_ovrdrv_data"
+        ovrdrv_data = self.to_ovrdrv_data(br, title, authors, ovrdrv_id)
+        if ovrdrv_data:
+            title = ovrdrv_data[8]
+            authors = ovrdrv_data[6]
+            mi = Metadata(title, authors)
+            self.parse_search_results(ovrdrv_data, mi)
+            if ovrdrv_id is None:
+                ovrdrv_id = ovrdrv_data[7]
+            if isbn is not None:
+                self.cache_isbn_to_identifier(isbn, ovrdrv_id)
+    
+            self.get_book_detail(br, ovrdrv_data[1], mi, ovrdrv_id, log)
+    
+            result_queue.put(mi)
+
+        return None
+    # }}}
+
+
+    def get_book_url(self, identifiers): # {{{
+        ovrdrv_id = identifiers.get('overdrive', None)
+        if ovrdrv_id is not None:
+            ovrdrv_data = ovrdrv_data_cache.get(ovrdrv_id, None)
+            if ovrdrv_data:
+                return ovrdrv_data[1]
+            else:
+                br = browser()
+                ovrdrv_data = self.to_ovrdrv_data(br, None, None, ovrdrv_id)
+                return ovrdrv_data[1]
+    # }}}
+
+    def download_cover(self, log, result_queue, abort, # {{{
+            title=None, authors=None, identifiers={}, timeout=30):
+        cached_url = self.get_cached_cover_url(identifiers)
+        if cached_url is None:
+            log.info('No cached cover found, running identify')
+            rq = Queue()
+            print "inside download cover, calling identify"
+            self.identify(log, rq, abort, title=title, authors=authors,
+                    identifiers=identifiers)
+            if abort.is_set():
+                return
+            results = []
+            while True:
+                try:
+                    results.append(rq.get_nowait())
+                except Empty:
+                    break
+            results.sort(key=self.identify_results_keygen(
+                title=title, authors=authors, identifiers=identifiers))
+            for mi in results:
+                cached_url = self.get_cached_cover_url(mi.identifiers)
+                if cached_url is not None:
+                    break
+        if cached_url is None:
+            log.info('No cover found')
+            return
+
+        if abort.is_set():
+            return
+
+        ovrdrv_id = identifiers.get('overdrive', None)
+        br = self.browser
+        referer = self.get_base_referer()+'ContentDetails-Cover.htm?ID='+ovrdrv_id
+        print "downloading cover, referer is "+str(referer)
+        req = mechanize.Request(cached_url)
+        req.add_header('referer', referer)
+        log('Downloading cover from:', cached_url)
+        try:
+            cdata = br.open_novisit(req, timeout=timeout).read()
+            result_queue.put((self, cdata))
+        except:
+            log.exception('Failed to download cover from:', cached_url)
+    # }}}
+
+    def get_cached_cover_url(self, identifiers): # {{{
+        url = None
+        ovrdrv_id = identifiers.get('overdrive', None)
+        print "inside get_cached_cover_url, ovrdrv_id is "+str(ovrdrv_id)
+        if ovrdrv_id is None:
+            isbn = identifiers.get('isbn', None)
+            if isbn is not None:
+                ovrdrv_id = self.cached_isbn_to_identifier(isbn)
+        if ovrdrv_id is not None:
+            url = self.cached_identifier_to_cover_url(ovrdrv_id)
+
+        return url
+    # }}}
+
+    def create_query(self, title=None, authors=None, identifiers={}):
+        q = ''
+        if title or authors:
+            def build_term(prefix, parts):
+                return ' '.join('in'+prefix + ':' + x for x in parts)
+            title_tokens = list(self.get_title_tokens(title, False, True))
+            if title_tokens:
+                q += build_term('title', title_tokens)
+            author_tokens = self.get_author_tokens(authors,
+                    only_first_author=True)
+            if author_tokens:
+                q += ('+' if q else '') + build_term('author',
+                        author_tokens)
+    
+        if isinstance(q, unicode):
+            q = q.encode('utf-8')
+        if not q:
+            return None
+        return BASE_URL+urlencode({
+            'q':q,
+            })
+
+    def get_base_referer(self): # to be used for passing referrer headers to cover download
+        choices = [
+            'http://overdrive.chipublib.org/82DC601D-7DDE-4212-B43A-09D821935B01/10/375/en/',
+            'http://emedia.clevnet.org/9D321DAD-EC0D-490D-BFD8-64AE2C96ECA8/10/241/en/',
+            'http://singapore.lib.overdrive.com/F11D55BE-A917-4D63-8111-318E88B29740/10/382/en/',
+            'http://ebooks.nypl.org/20E48048-A377-4520-BC43-F8729A42A424/10/257/en/',
+            'http://spl.lib.overdrive.com/5875E082-4CB2-4689-9426-8509F354AFEF/10/335/en/'
+        ]
+        return choices[random.randint(0, len(choices)-1)]
+    
+    def format_results(self, reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid):
+        fix_slashes = re.compile(r'\\/')
+        thumbimage = fix_slashes.sub('/', thumbimage)
+        worldcatlink = fix_slashes.sub('/', worldcatlink)
+        cover_url = re.sub('(?P<img>(Ima?g(eType-)?))200', '\g<img>100', thumbimage)
+        social_metadata_url = base_url+'TitleInfo.aspx?ReserveID='+reserveid+'&FormatID='+formatid
+        series_num = ''
+        if not series:
+            if subtitle:
+                title = od_title+': '+subtitle
+            else:
+                title = od_title
+        else:
+            title = od_title
+            m = re.search("([0-9]+$)", subtitle)
+            if m:
+                series_num = float(m.group(1))
+        return [cover_url, social_metadata_url, worldcatlink, series, series_num, publisher, creators, reserveid, title]
+    
+    def safe_query(self, br, query_url, post=''):
+        '''
+        The query must be initialized by loading an empty search results page
+        this page attempts to set a cookie that Mechanize doesn't like
+        copy the cookiejar to a separate instance and make a one-off request with the temp cookiejar
+        '''
+        goodcookies = br._ua_handlers['_cookies'].cookiejar
+        clean_cj = mechanize.CookieJar()
+        cookies_to_copy = []
+        for cookie in goodcookies:
+            copied_cookie = copy.deepcopy(cookie)
+            cookies_to_copy.append(copied_cookie)
+        for copied_cookie in cookies_to_copy:
+            clean_cj.set_cookie(copied_cookie)
+
+        if post:
+            br.open_novisit(query_url, post)
+        else:
+            br.open_novisit(query_url)
+
+        br.set_cookiejar(clean_cj)
+
+
+    def overdrive_search(self, br, q, title, author):
+        # re-initialize the cookiejar to so that it's clean
+        clean_cj = mechanize.CookieJar()
+        br.set_cookiejar(clean_cj)
+        q_query = q+'default.aspx/SearchByKeyword'
+        q_init_search = q+'SearchResults.aspx'
+        # get first author as string - convert this to a proper cleanup function later
+        s = Source(None)
+        print "printing list with author "+str(author)+":"
+        author_tokens = list(s.get_author_tokens(author))
+        print list(author_tokens)
+        title_tokens = list(s.get_title_tokens(title, False, True))
+        print "there are "+str(len(title_tokens))+" title tokens"
+        for token in title_tokens:
+            print "cleaned up title token is: "+str(token)
+    
+        if len(title_tokens) >= len(author_tokens):
+            initial_q = ' '.join(title_tokens)
+            xref_q = '+'.join(author_tokens)
+        else:
+            initial_q = ' '.join(author_tokens)
+            xref_q = '+'.join(title_tokens)
+    
+        print "initial query is "+str(initial_q)
+        print "cross reference query is "+str(xref_q)
+        q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
+        query = '{"szKeyword":"'+initial_q+'"}'
+    
+        # main query, requires specific Content Type header
+        req = mechanize.Request(q_query)
+        req.add_header('Content-Type', 'application/json; charset=utf-8')
+        br.open_novisit(req, query)
+    
+        print "q_init_search is "+q_init_search
+        # initiate the search without messing up the cookiejar
+        self.safe_query(br, q_init_search)
+    
+        # get the search results object
+        results = False
+        while results == False:
+            xreq = mechanize.Request(q_xref)
+            xreq.add_header('X-Requested-With', 'XMLHttpRequest')
+            xreq.add_header('Referer', q_init_search)
+            xreq.add_header('Accept', 'application/json, text/javascript, */*')
+            raw = br.open_novisit(xreq).read()
+            print "overdrive search result is:\n"+raw
+            for m in re.finditer(ur'"iTotalDisplayRecords":(?P<displayrecords>\d+).*?"iTotalRecords":(?P<totalrecords>\d+)', raw):
+                if int(m.group('displayrecords')) >= 1:
+                    results = True
+                elif int(m.group('totalrecords')) >= 1:
+                    xref_q = ''
+                    q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
+                elif int(m.group('totalrecords')) == 0:
+                    return ''
+
+        print "\n\nsorting results"
+        return self.sort_ovrdrv_results(raw, title, title_tokens, author, author_tokens)
+    
+    
+    def sort_ovrdrv_results(self, raw, title=None, title_tokens=None, author=None, author_tokens=None, ovrdrv_id=None):
+        print "\ntitle to search for is "+str(title)+"\nauthor to search for is "+str(author)
+        close_matches = []
+        raw = re.sub('.*?\[\[(?P<content>.*?)\]\].*', '[[\g<content>]]', raw)
+        results = eval(raw)
+        print "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
+        #print results
+        # The search results are either from a keyword search or a multi-format list from a single ID,
+        # sort through the results for closest match/format
+        if results:
+            for reserveid, od_title, subtitle, edition, series, publisher, format, formatid, creators, \
+                    thumbimage, shortdescription, worldcatlink, excerptlink, creatorfile, sorttitle, \
+                    availabletolibrary, availabletoretailer, relevancyrank, unknown1, unknown2, unknown3 in results:
+                print "this record's title is "+od_title+", subtitle is "+subtitle+", author[s] are "+creators+", series is "+series
+                if ovrdrv_id is not None and int(formatid) in [1, 50, 410, 900]:
+                    print "overdrive id is not None, searching based on format type priority"
+                    return self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)            
+                else:
+                    creators = creators.split(', ')
+                    print "split creators from results are: "+str(creators)
+                    # if an exact match in a preferred format occurs
+                    if creators[0] == author[0] and od_title == title and int(formatid) in [1, 50, 410, 900]:
+                        print "Got Exact Match!!!"
+                        return self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)
+                    else:
+                        close_title_match = False
+                        close_author_match = False
+                        print "format id is "+str(formatid)
+                        for token in title_tokens:
+                            print "attempting to find "+str(token)+" title token"
+                            if od_title.lower().find(token.lower()) != -1:
+                                print "matched token"
+                                close_title_match = True
+                            else:
+                                print "token didn't match"
+                                close_title_match = False
+                                break
+                        for token in author_tokens:
+                            print "attempting to find "+str(token)+" author token"
+                            if creators[0].lower().find(token.lower()) != -1:
+                                print "matched token"
+                                close_author_match = True
+                            else:
+                                print "token didn't match"
+                                close_author_match = False
+                                break
+                        if close_title_match and close_author_match and int(formatid) in [1, 50, 410, 900]:
+                            if subtitle and series:
+                                close_matches.insert(0, self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
+                            else:
+                                close_matches.append(self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
+            if close_matches:
+                return close_matches[0]
+            else:
+                return ''
+        else:
+            return ''
+    
+    
+    def overdrive_get_record(self, br, q, ovrdrv_id):
+        search_url = q+'SearchResults.aspx?ReserveID={'+ovrdrv_id+'}'
+        results_url = q+'SearchResults.svc/GetResults?sEcho=1&iColumns=18&sColumns=ReserveID%2CTitle%2CSubtitle%2CEdition%2CSeries%2CPublisher%2CFormat%2CFormatID%2CCreators%2CThumbImage%2CShortDescription%2CWorldCatLink%2CExcerptLink%2CCreatorFile%2CSortTitle%2CAvailableToLibrary%2CAvailableToRetailer%2CRelevancyRank&iDisplayStart=0&iDisplayLength=10&sSearch=&bEscapeRegex=true&iSortingCols=1&iSortCol_0=17&sSortDir_0=asc'
+    
+        # get the base url to set the proper session cookie
+        br.open_novisit(q)
+    
+        # initialize the search
+        self.safe_query(br, search_url)
+    
+        # get the results
+        req = mechanize.Request(results_url)
+        req.add_header('X-Requested-With', 'XMLHttpRequest')
+        req.add_header('Referer', search_url)
+        req.add_header('Accept', 'application/json, text/javascript, */*')
+        raw = br.open_novisit(req)
+        raw = str(list(raw))
+        clean_cj = mechanize.CookieJar()
+        br.set_cookiejar(clean_cj)
+        return self.sort_ovrdrv_results(raw, None, None, None, ovrdrv_id)
+
+
+    def find_ovrdrv_data(self, br, title, author, isbn, ovrdrv_id=None):
+        print "in find_ovrdrv_data, title is "+str(title)+", author is "+str(author)+", overdrive id is "+str(ovrdrv_id)
+        q = base_url
+        if ovrdrv_id is None:
+           return self.overdrive_search(br, q, title, author)
+        else:
+           return self.overdrive_get_record(br, q, ovrdrv_id)
+
+
+
+    def to_ovrdrv_data(self, br, title=None, author=None, ovrdrv_id=None):
+        '''
+        Takes either a title/author combo or an Overdrive ID.  One of these
+        two must be passed to this function.
+        '''
+        print "starting to_ovrdrv_data"
+        if ovrdrv_id is not None:
+            with cache_lock:
+                ans = ovrdrv_data_cache.get(ovrdrv_id, None)
+            if ans:
+                print "inside to_ovrdrv_data, cache lookup successful, ans is "+str(ans)
+                return ans
+            elif ans is False:
+                print "inside to_ovrdrv_data, ans returned False"
+                return None
+            else:
+                ovrdrv_data = self.find_ovrdrv_data(br, title, author, ovrdrv_id)
+        else:
+            try:
+                print "trying to retrieve data, running find_ovrdrv_data"
+                ovrdrv_data = self.find_ovrdrv_data(br, title, author, ovrdrv_id)
+                print "ovrdrv_data is "+str(ovrdrv_data)
+            except:
+                import traceback
+                traceback.print_exc()
+                ovrdrv_data = None
+        print "writing results to ovrdrv_data cache"
+        with cache_lock:
+            ovrdrv_data_cache[ovrdrv_id] = ovrdrv_data if ovrdrv_data else False
+
+        return ovrdrv_data if ovrdrv_data else False
+
+
+    def parse_search_results(self, ovrdrv_data, mi):
+        '''
+        Parse the formatted search results from the initial Overdrive query and
+        add the values to the metadta.
+        
+        The list object has these values:
+        [cover_url[0], social_metadata_url[1], worldcatlink[2], series[3], series_num[4],
+        publisher[5], creators[6], reserveid[7], title[8]]
+
+        '''
+        print "inside parse_search_results, writing the metadata results"
+        ovrdrv_id = ovrdrv_data[7]
+        mi.set_identifier('overdrive', ovrdrv_id)
+
+        if len(ovrdrv_data[3]) > 1:
+            mi.series = ovrdrv_data[3]
+            if ovrdrv_data[4]:
+                mi.series_index = ovrdrv_data[4]
+        mi.publisher = ovrdrv_data[5]
+        mi.authors = ovrdrv_data[6]
+        mi.title = ovrdrv_data[8]
+        cover_url = ovrdrv_data[0]
+        if cover_url:
+            self.cache_identifier_to_cover_url(ovrdrv_id,
+                    cover_url)
+
+
+    def get_book_detail(self, br, metadata_url, mi, ovrdrv_id, log):
+        try:
+            raw = br.open_novisit(metadata_url).read()
+        except Exception, e:
+            if callable(getattr(e, 'getcode', None)) and \
+                    e.getcode() == 404:
+                return False
+            raise   
+        raw = xml_to_unicode(raw, strip_encoding_pats=True,
+                resolve_entities=True)[0]
+        try:
+            root = soupparser.fromstring(raw)
+        except:
+            return False
+
+        pub_date = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblPubDate']/text()")
+        lang = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblLanguage']/text()")
+        subjects = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblSubjects']/text()")
+        ebook_isbn = root.xpath("//td/label[@id='ctl00_ContentPlaceHolder1_lblIdentifier']/text()")
+        desc = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblDescription']/ancestor::div[1]")
+
+        if pub_date:
+            from calibre.utils.date import parse_date
+            mi.pubdate = parse_date(pub_date[0].strip())
+        if lang:
+            mi.language = lang[0].strip()
+            print "languages is "+str(mi.language)
+        #if ebook_isbn:
+        #    print "ebook isbn is "+str(ebook_isbn[0])
+        #    isbn = check_isbn(ebook_isbn[0].strip())
+        #    if isbn:
+        #        self.cache_isbn_to_identifier(isbn, ovrdrv_id)
+        #        mi.isbn = isbn
+        if subjects:
+            mi.tags = [tag.strip() for tag in subjects[0].split(',')]
+            print "tags are "+str(mi.tags)
+        if desc:
+            desc = desc[0]
+            desc = html.tostring(desc, method='html', encoding=unicode).strip()
+            # remove all attributes from tags
+            desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
+            # Remove comments
+            desc = re.sub(r'(?s)<!--.*?-->', '', desc)
+            mi.comments = sanitize_comments_html(desc)
+
+        return None
+
+
+def main(args=sys.argv):
+    print "running through main tests"
+    import tempfile, os, time
+    tdir = tempfile.gettempdir()
+    br = browser()
+    for ovrdrv_id, isbn, title, author in [
+            #(None, '0899661343', 'On the Road', ['Jack Kerouac']), # basic test, no series, single author
+            #(None, '9780061952838', 'The Fellowship of the Ring', ['J. R. R. Tolkien']), # Series test, multi-author
+            #(None, '9780061952838', 'The Two Towers (The Lord of the Rings, Book II)', ['J. R. R. Tolkien']), # Series test, book 2
+            #(None, '9780618153985', 'The Fellowship of the Ring (The Lord of the Rings, Part 1)', ['J.R.R. Tolkien']),
+            #('57844706-20fa-4ace-b5ee-3470b1b52173', None, 'The Two Towers', ['J. R. R. Tolkien']), # Series test, w/ ovrdrv id
+            #(None, '9780345505057', 'Deluge', ['Anne McCaffrey']) # Multiple authors
+            #(None, None, 'Deluge', ['Anne McCaffrey']) # Empty ISBN
+            #(None, None, 'On the Road', ['Jack Kerouac']), # Nonetype ISBN
+            #(None, '9780345435279', 'A Caress of Twilight', ['Laurell K. Hamilton']),
+            #(None, '9780606087230', 'The Omnivore\'s Dilemma : A Natural History of Four Meals', ['Michael Pollan']), # Subtitle colon
+            #(None, '9780061747649', 'Mental_Floss Presents: Condensed Knowledge', ['Will Pearson', 'Mangesh Hattikudur']),
+            #(None, '9781400050802', 'The Zombie Survival Guide', ['Max Brooks']), # Two books with this title by this author
+            #(None, '9781775414315', 'The Worst Journey in the World / Antarctic 1910-1913', ['Apsley Cherry-Garrard']), # Garbage sub-title
+            #(None, '9780440335160', 'Outlander', ['Diana Gabaldon']), # Returns lots of results to sort through to get the best match
+            (None, '9780345509741', 'The Horror Stories of Robert E. Howard', ['Robert E. Howard']), # Complex title with initials/dots stripped, some results don't have a cover
+            ]:
+        cpath = os.path.join(tdir, title+'.jpg')
+        print "cpath is "+cpath
+        st = time.time()
+        curl = get_cover_url(isbn, title, author, br, ovrdrv_id)
+        print '\n\n Took ', time.time() - st, ' to get basic metadata\n\n'
+        if curl is None:
+            print 'No cover found for', title
+        else:
+            print "curl is "+curl
+            #open(cpath, 'wb').write(br.open_novisit(curl).read())
+            #print 'Cover for', title, 'saved to', cpath
+        st = time.time()
+        print get_social_metadata(title, author, isbn, ovrdrv_id)
+        print '\n\n Took ', time.time() - st, ' to get detailed metadata\n\n'
+
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())

From aa30f306b5f8894641bc5536559b427c02c5303d Mon Sep 17 00:00:00 2001
From: Lee <ldolse@yahoo.com>
Date: Mon, 18 Apr 2011 18:17:54 +0800
Subject: [PATCH 17/30] ...

---
 .../ebooks/metadata/sources/overdrive.py      | 64 ++++---------------
 1 file changed, 14 insertions(+), 50 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/overdrive.py b/src/calibre/ebooks/metadata/sources/overdrive.py
index 6950711da4..dd2e8b2a85 100755
--- a/src/calibre/ebooks/metadata/sources/overdrive.py
+++ b/src/calibre/ebooks/metadata/sources/overdrive.py
@@ -45,7 +45,6 @@ class OverDrive(Source):
         isbn = identifiers.get('isbn', None)
 
         br = self.browser
-        print "in identify, calling to_ovrdrv_data"
         ovrdrv_data = self.to_ovrdrv_data(br, title, authors, ovrdrv_id)
         if ovrdrv_data:
             title = ovrdrv_data[8]
@@ -83,7 +82,6 @@ class OverDrive(Source):
         if cached_url is None:
             log.info('No cached cover found, running identify')
             rq = Queue()
-            print "inside download cover, calling identify"
             self.identify(log, rq, abort, title=title, authors=authors,
                     identifiers=identifiers)
             if abort.is_set():
@@ -110,7 +108,6 @@ class OverDrive(Source):
         ovrdrv_id = identifiers.get('overdrive', None)
         br = self.browser
         referer = self.get_base_referer()+'ContentDetails-Cover.htm?ID='+ovrdrv_id
-        print "downloading cover, referer is "+str(referer)
         req = mechanize.Request(cached_url)
         req.add_header('referer', referer)
         log('Downloading cover from:', cached_url)
@@ -124,7 +121,6 @@ class OverDrive(Source):
     def get_cached_cover_url(self, identifiers): # {{{
         url = None
         ovrdrv_id = identifiers.get('overdrive', None)
-        print "inside get_cached_cover_url, ovrdrv_id is "+str(ovrdrv_id)
         if ovrdrv_id is None:
             isbn = identifiers.get('isbn', None)
             if isbn is not None:
@@ -217,14 +213,9 @@ class OverDrive(Source):
         q_init_search = q+'SearchResults.aspx'
         # get first author as string - convert this to a proper cleanup function later
         s = Source(None)
-        print "printing list with author "+str(author)+":"
         author_tokens = list(s.get_author_tokens(author))
-        print list(author_tokens)
         title_tokens = list(s.get_title_tokens(title, False, True))
-        print "there are "+str(len(title_tokens))+" title tokens"
-        for token in title_tokens:
-            print "cleaned up title token is: "+str(token)
-    
+
         if len(title_tokens) >= len(author_tokens):
             initial_q = ' '.join(title_tokens)
             xref_q = '+'.join(author_tokens)
@@ -232,8 +223,6 @@ class OverDrive(Source):
             initial_q = ' '.join(author_tokens)
             xref_q = '+'.join(title_tokens)
     
-        print "initial query is "+str(initial_q)
-        print "cross reference query is "+str(xref_q)
         q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
         query = '{"szKeyword":"'+initial_q+'"}'
     
@@ -242,7 +231,6 @@ class OverDrive(Source):
         req.add_header('Content-Type', 'application/json; charset=utf-8')
         br.open_novisit(req, query)
     
-        print "q_init_search is "+q_init_search
         # initiate the search without messing up the cookiejar
         self.safe_query(br, q_init_search)
     
@@ -254,7 +242,6 @@ class OverDrive(Source):
             xreq.add_header('Referer', q_init_search)
             xreq.add_header('Accept', 'application/json, text/javascript, */*')
             raw = br.open_novisit(xreq).read()
-            print "overdrive search result is:\n"+raw
             for m in re.finditer(ur'"iTotalDisplayRecords":(?P<displayrecords>\d+).*?"iTotalRecords":(?P<totalrecords>\d+)', raw):
                 if int(m.group('displayrecords')) >= 1:
                     results = True
@@ -264,54 +251,40 @@ class OverDrive(Source):
                 elif int(m.group('totalrecords')) == 0:
                     return ''
 
-        print "\n\nsorting results"
         return self.sort_ovrdrv_results(raw, title, title_tokens, author, author_tokens)
     
     
     def sort_ovrdrv_results(self, raw, title=None, title_tokens=None, author=None, author_tokens=None, ovrdrv_id=None):
-        print "\ntitle to search for is "+str(title)+"\nauthor to search for is "+str(author)
         close_matches = []
         raw = re.sub('.*?\[\[(?P<content>.*?)\]\].*', '[[\g<content>]]', raw)
         results = eval(raw)
-        print "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
-        #print results
+
         # The search results are either from a keyword search or a multi-format list from a single ID,
         # sort through the results for closest match/format
         if results:
             for reserveid, od_title, subtitle, edition, series, publisher, format, formatid, creators, \
                     thumbimage, shortdescription, worldcatlink, excerptlink, creatorfile, sorttitle, \
                     availabletolibrary, availabletoretailer, relevancyrank, unknown1, unknown2, unknown3 in results:
-                print "this record's title is "+od_title+", subtitle is "+subtitle+", author[s] are "+creators+", series is "+series
                 if ovrdrv_id is not None and int(formatid) in [1, 50, 410, 900]:
-                    print "overdrive id is not None, searching based on format type priority"
                     return self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)            
                 else:
                     creators = creators.split(', ')
-                    print "split creators from results are: "+str(creators)
                     # if an exact match in a preferred format occurs
                     if creators[0] == author[0] and od_title == title and int(formatid) in [1, 50, 410, 900]:
-                        print "Got Exact Match!!!"
                         return self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)
                     else:
                         close_title_match = False
                         close_author_match = False
-                        print "format id is "+str(formatid)
                         for token in title_tokens:
-                            print "attempting to find "+str(token)+" title token"
                             if od_title.lower().find(token.lower()) != -1:
-                                print "matched token"
                                 close_title_match = True
                             else:
-                                print "token didn't match"
                                 close_title_match = False
                                 break
                         for token in author_tokens:
-                            print "attempting to find "+str(token)+" author token"
                             if creators[0].lower().find(token.lower()) != -1:
-                                print "matched token"
                                 close_author_match = True
                             else:
-                                print "token didn't match"
                                 close_author_match = False
                                 break
                         if close_title_match and close_author_match and int(formatid) in [1, 50, 410, 900]:
@@ -350,7 +323,6 @@ class OverDrive(Source):
 
 
     def find_ovrdrv_data(self, br, title, author, isbn, ovrdrv_id=None):
-        print "in find_ovrdrv_data, title is "+str(title)+", author is "+str(author)+", overdrive id is "+str(ovrdrv_id)
         q = base_url
         if ovrdrv_id is None:
            return self.overdrive_search(br, q, title, author)
@@ -364,28 +336,22 @@ class OverDrive(Source):
         Takes either a title/author combo or an Overdrive ID.  One of these
         two must be passed to this function.
         '''
-        print "starting to_ovrdrv_data"
         if ovrdrv_id is not None:
             with cache_lock:
                 ans = ovrdrv_data_cache.get(ovrdrv_id, None)
             if ans:
-                print "inside to_ovrdrv_data, cache lookup successful, ans is "+str(ans)
                 return ans
             elif ans is False:
-                print "inside to_ovrdrv_data, ans returned False"
                 return None
             else:
                 ovrdrv_data = self.find_ovrdrv_data(br, title, author, ovrdrv_id)
         else:
             try:
-                print "trying to retrieve data, running find_ovrdrv_data"
                 ovrdrv_data = self.find_ovrdrv_data(br, title, author, ovrdrv_id)
-                print "ovrdrv_data is "+str(ovrdrv_data)
             except:
                 import traceback
                 traceback.print_exc()
                 ovrdrv_data = None
-        print "writing results to ovrdrv_data cache"
         with cache_lock:
             ovrdrv_data_cache[ovrdrv_id] = ovrdrv_data if ovrdrv_data else False
 
@@ -402,7 +368,6 @@ class OverDrive(Source):
         publisher[5], creators[6], reserveid[7], title[8]]
 
         '''
-        print "inside parse_search_results, writing the metadata results"
         ovrdrv_id = ovrdrv_data[7]
         mi.set_identifier('overdrive', ovrdrv_id)
 
@@ -445,7 +410,7 @@ class OverDrive(Source):
             mi.pubdate = parse_date(pub_date[0].strip())
         if lang:
             mi.language = lang[0].strip()
-            print "languages is "+str(mi.language)
+
         #if ebook_isbn:
         #    print "ebook isbn is "+str(ebook_isbn[0])
         #    isbn = check_isbn(ebook_isbn[0].strip())
@@ -454,7 +419,7 @@ class OverDrive(Source):
         #        mi.isbn = isbn
         if subjects:
             mi.tags = [tag.strip() for tag in subjects[0].split(',')]
-            print "tags are "+str(mi.tags)
+
         if desc:
             desc = desc[0]
             desc = html.tostring(desc, method='html', encoding=unicode).strip()
@@ -468,7 +433,6 @@ class OverDrive(Source):
 
 
 def main(args=sys.argv):
-    print "running through main tests"
     import tempfile, os, time
     tdir = tempfile.gettempdir()
     br = browser()
@@ -490,19 +454,19 @@ def main(args=sys.argv):
             (None, '9780345509741', 'The Horror Stories of Robert E. Howard', ['Robert E. Howard']), # Complex title with initials/dots stripped, some results don't have a cover
             ]:
         cpath = os.path.join(tdir, title+'.jpg')
-        print "cpath is "+cpath
+        #print "cpath is "+cpath
         st = time.time()
         curl = get_cover_url(isbn, title, author, br, ovrdrv_id)
-        print '\n\n Took ', time.time() - st, ' to get basic metadata\n\n'
-        if curl is None:
-            print 'No cover found for', title
-        else:
-            print "curl is "+curl
-            #open(cpath, 'wb').write(br.open_novisit(curl).read())
-            #print 'Cover for', title, 'saved to', cpath
+        #print '\n\n Took ', time.time() - st, ' to get basic metadata\n\n'
+        #if curl is None:
+        #    print 'No cover found for', title
+        #else:
+        #    print "curl is "+curl
+        #    open(cpath, 'wb').write(br.open_novisit(curl).read())
+        #    print 'Cover for', title, 'saved to', cpath
         st = time.time()
-        print get_social_metadata(title, author, isbn, ovrdrv_id)
-        print '\n\n Took ', time.time() - st, ' to get detailed metadata\n\n'
+        #print get_social_metadata(title, author, isbn, ovrdrv_id)
+        #print '\n\n Took ', time.time() - st, ' to get detailed metadata\n\n'
 
     return 0
 

From 2b82d4944859d22e56daf21f030586f4fb8977b3 Mon Sep 17 00:00:00 2001
From: Lee <ldolse@yahoo.com>
Date: Mon, 18 Apr 2011 20:56:11 +0800
Subject: [PATCH 18/30] fixed multiple author sorting

---
 .../ebooks/metadata/sources/overdrive.py      | 43 ++++++++++++++-----
 1 file changed, 33 insertions(+), 10 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/overdrive.py b/src/calibre/ebooks/metadata/sources/overdrive.py
index dd2e8b2a85..42b320745a 100755
--- a/src/calibre/ebooks/metadata/sources/overdrive.py
+++ b/src/calibre/ebooks/metadata/sources/overdrive.py
@@ -212,10 +212,15 @@ class OverDrive(Source):
         q_query = q+'default.aspx/SearchByKeyword'
         q_init_search = q+'SearchResults.aspx'
         # get first author as string - convert this to a proper cleanup function later
-        s = Source(None)
-        author_tokens = list(s.get_author_tokens(author))
-        title_tokens = list(s.get_title_tokens(title, False, True))
-
+        print "printing list with author "+str(author)+":"
+        author_tokens = list(self.get_author_tokens(author,
+                only_first_author=True))
+        print list(author_tokens)
+        title_tokens = list(self.get_title_tokens(title, False, True))
+        print "there are "+str(len(title_tokens))+" title tokens"
+        for token in title_tokens:
+            print "cleaned up title token is: "+str(token)
+    
         if len(title_tokens) >= len(author_tokens):
             initial_q = ' '.join(title_tokens)
             xref_q = '+'.join(author_tokens)
@@ -251,41 +256,59 @@ class OverDrive(Source):
                 elif int(m.group('totalrecords')) == 0:
                     return ''
 
+        print "\n\nsorting results"
         return self.sort_ovrdrv_results(raw, title, title_tokens, author, author_tokens)
     
     
     def sort_ovrdrv_results(self, raw, title=None, title_tokens=None, author=None, author_tokens=None, ovrdrv_id=None):
+        print "\ntitle to search for is "+str(title)+"\nauthor to search for is "+str(author)
         close_matches = []
         raw = re.sub('.*?\[\[(?P<content>.*?)\]\].*', '[[\g<content>]]', raw)
         results = eval(raw)
-
+        print "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
+        #print results
         # The search results are either from a keyword search or a multi-format list from a single ID,
         # sort through the results for closest match/format
         if results:
             for reserveid, od_title, subtitle, edition, series, publisher, format, formatid, creators, \
                     thumbimage, shortdescription, worldcatlink, excerptlink, creatorfile, sorttitle, \
                     availabletolibrary, availabletoretailer, relevancyrank, unknown1, unknown2, unknown3 in results:
+                print "this record's title is "+od_title+", subtitle is "+subtitle+", author[s] are "+creators+", series is "+series
                 if ovrdrv_id is not None and int(formatid) in [1, 50, 410, 900]:
+                    print "overdrive id is not None, searching based on format type priority"
                     return self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)            
                 else:
                     creators = creators.split(', ')
+                    print "split creators from results are: "+str(creators)+", there are "+str(len(creators))+" total"
                     # if an exact match in a preferred format occurs
                     if creators[0] == author[0] and od_title == title and int(formatid) in [1, 50, 410, 900]:
+                        print "Got Exact Match!!!"
                         return self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)
                     else:
                         close_title_match = False
                         close_author_match = False
+                        print "format id is "+str(formatid)
                         for token in title_tokens:
+                            print "attempting to find "+str(token)+" title token"
                             if od_title.lower().find(token.lower()) != -1:
+                                print "matched token"
                                 close_title_match = True
                             else:
+                                print "token didn't match"
                                 close_title_match = False
                                 break
-                        for token in author_tokens:
-                            if creators[0].lower().find(token.lower()) != -1:
-                                close_author_match = True
-                            else:
-                                close_author_match = False
+                        for author in creators:
+                            print "matching tokens for "+str(author)
+                            for token in author_tokens:
+                                print "attempting to find "+str(token)+" author token"
+                                if author.lower().find(token.lower()) != -1:
+                                    print "matched token"
+                                    close_author_match = True
+                                else:
+                                    print "token didn't match"
+                                    close_author_match = False
+                                    break
+                            if close_author_match:
                                 break
                         if close_title_match and close_author_match and int(formatid) in [1, 50, 410, 900]:
                             if subtitle and series:

From 7b196c762bb63b482bef111fcabe553d74ed8395 Mon Sep 17 00:00:00 2001
From: Lee <ldolse@yahoo.com>
Date: Mon, 18 Apr 2011 21:07:26 +0800
Subject: [PATCH 19/30] prioritized results with covers, cleaned up print
 statements

---
 .../ebooks/metadata/sources/overdrive.py      | 69 ++++++++-----------
 1 file changed, 27 insertions(+), 42 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/overdrive.py b/src/calibre/ebooks/metadata/sources/overdrive.py
index 42b320745a..4fc8dbab1b 100755
--- a/src/calibre/ebooks/metadata/sources/overdrive.py
+++ b/src/calibre/ebooks/metadata/sources/overdrive.py
@@ -55,9 +55,9 @@ class OverDrive(Source):
                 ovrdrv_id = ovrdrv_data[7]
             if isbn is not None:
                 self.cache_isbn_to_identifier(isbn, ovrdrv_id)
-    
+
             self.get_book_detail(br, ovrdrv_data[1], mi, ovrdrv_id, log)
-    
+
             result_queue.put(mi)
 
         return None
@@ -144,7 +144,7 @@ class OverDrive(Source):
             if author_tokens:
                 q += ('+' if q else '') + build_term('author',
                         author_tokens)
-    
+
         if isinstance(q, unicode):
             q = q.encode('utf-8')
         if not q:
@@ -162,7 +162,7 @@ class OverDrive(Source):
             'http://spl.lib.overdrive.com/5875E082-4CB2-4689-9426-8509F354AFEF/10/335/en/'
         ]
         return choices[random.randint(0, len(choices)-1)]
-    
+
     def format_results(self, reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid):
         fix_slashes = re.compile(r'\\/')
         thumbimage = fix_slashes.sub('/', thumbimage)
@@ -181,7 +181,7 @@ class OverDrive(Source):
             if m:
                 series_num = float(m.group(1))
         return [cover_url, social_metadata_url, worldcatlink, series, series_num, publisher, creators, reserveid, title]
-    
+
     def safe_query(self, br, query_url, post=''):
         '''
         The query must be initialized by loading an empty search results page
@@ -212,33 +212,29 @@ class OverDrive(Source):
         q_query = q+'default.aspx/SearchByKeyword'
         q_init_search = q+'SearchResults.aspx'
         # get first author as string - convert this to a proper cleanup function later
-        print "printing list with author "+str(author)+":"
         author_tokens = list(self.get_author_tokens(author,
                 only_first_author=True))
-        print list(author_tokens)
-        title_tokens = list(self.get_title_tokens(title, False, True))
-        print "there are "+str(len(title_tokens))+" title tokens"
-        for token in title_tokens:
-            print "cleaned up title token is: "+str(token)
-    
+        title_tokens = list(self.get_title_tokens(title,
+                strip_joiners=False, strip_subtitle=True))
+
         if len(title_tokens) >= len(author_tokens):
             initial_q = ' '.join(title_tokens)
             xref_q = '+'.join(author_tokens)
         else:
             initial_q = ' '.join(author_tokens)
             xref_q = '+'.join(title_tokens)
-    
+
         q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
         query = '{"szKeyword":"'+initial_q+'"}'
-    
+
         # main query, requires specific Content Type header
         req = mechanize.Request(q_query)
         req.add_header('Content-Type', 'application/json; charset=utf-8')
         br.open_novisit(req, query)
-    
+
         # initiate the search without messing up the cookiejar
         self.safe_query(br, q_init_search)
-    
+
         # get the search results object
         results = False
         while results == False:
@@ -256,16 +252,13 @@ class OverDrive(Source):
                 elif int(m.group('totalrecords')) == 0:
                     return ''
 
-        print "\n\nsorting results"
         return self.sort_ovrdrv_results(raw, title, title_tokens, author, author_tokens)
-    
-    
+
+
     def sort_ovrdrv_results(self, raw, title=None, title_tokens=None, author=None, author_tokens=None, ovrdrv_id=None):
-        print "\ntitle to search for is "+str(title)+"\nauthor to search for is "+str(author)
         close_matches = []
         raw = re.sub('.*?\[\[(?P<content>.*?)\]\].*', '[[\g<content>]]', raw)
         results = eval(raw)
-        print "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
         #print results
         # The search results are either from a keyword search or a multi-format list from a single ID,
         # sort through the results for closest match/format
@@ -273,44 +266,36 @@ class OverDrive(Source):
             for reserveid, od_title, subtitle, edition, series, publisher, format, formatid, creators, \
                     thumbimage, shortdescription, worldcatlink, excerptlink, creatorfile, sorttitle, \
                     availabletolibrary, availabletoretailer, relevancyrank, unknown1, unknown2, unknown3 in results:
-                print "this record's title is "+od_title+", subtitle is "+subtitle+", author[s] are "+creators+", series is "+series
+                #print "this record's title is "+od_title+", subtitle is "+subtitle+", author[s] are "+creators+", series is "+series
                 if ovrdrv_id is not None and int(formatid) in [1, 50, 410, 900]:
-                    print "overdrive id is not None, searching based on format type priority"
-                    return self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)            
+                    #print "overdrive id is not None, searching based on format type priority"
+                    return self.format_results(reserveid, od_title, subtitle, series, publisher,
+                            creators, thumbimage, worldcatlink, formatid)            
                 else:
                     creators = creators.split(', ')
-                    print "split creators from results are: "+str(creators)+", there are "+str(len(creators))+" total"
                     # if an exact match in a preferred format occurs
                     if creators[0] == author[0] and od_title == title and int(formatid) in [1, 50, 410, 900]:
-                        print "Got Exact Match!!!"
-                        return self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)
+                        return self.format_results(reserveid, od_title, subtitle, series, publisher,
+                                creators, thumbimage, worldcatlink, formatid)
                     else:
                         close_title_match = False
                         close_author_match = False
-                        print "format id is "+str(formatid)
                         for token in title_tokens:
-                            print "attempting to find "+str(token)+" title token"
                             if od_title.lower().find(token.lower()) != -1:
-                                print "matched token"
                                 close_title_match = True
                             else:
-                                print "token didn't match"
                                 close_title_match = False
                                 break
                         for author in creators:
-                            print "matching tokens for "+str(author)
                             for token in author_tokens:
-                                print "attempting to find "+str(token)+" author token"
                                 if author.lower().find(token.lower()) != -1:
-                                    print "matched token"
                                     close_author_match = True
                                 else:
-                                    print "token didn't match"
                                     close_author_match = False
                                     break
                             if close_author_match:
                                 break
-                        if close_title_match and close_author_match and int(formatid) in [1, 50, 410, 900]:
+                        if close_title_match and close_author_match and int(formatid) in [1, 50, 410, 900] and thumbimage:
                             if subtitle and series:
                                 close_matches.insert(0, self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
                             else:
@@ -321,18 +306,18 @@ class OverDrive(Source):
                 return ''
         else:
             return ''
-    
-    
+
+
     def overdrive_get_record(self, br, q, ovrdrv_id):
         search_url = q+'SearchResults.aspx?ReserveID={'+ovrdrv_id+'}'
         results_url = q+'SearchResults.svc/GetResults?sEcho=1&iColumns=18&sColumns=ReserveID%2CTitle%2CSubtitle%2CEdition%2CSeries%2CPublisher%2CFormat%2CFormatID%2CCreators%2CThumbImage%2CShortDescription%2CWorldCatLink%2CExcerptLink%2CCreatorFile%2CSortTitle%2CAvailableToLibrary%2CAvailableToRetailer%2CRelevancyRank&iDisplayStart=0&iDisplayLength=10&sSearch=&bEscapeRegex=true&iSortingCols=1&iSortCol_0=17&sSortDir_0=asc'
-    
+
         # get the base url to set the proper session cookie
         br.open_novisit(q)
-    
+
         # initialize the search
         self.safe_query(br, search_url)
-    
+
         # get the results
         req = mechanize.Request(results_url)
         req.add_header('X-Requested-With', 'XMLHttpRequest')
@@ -385,7 +370,7 @@ class OverDrive(Source):
         '''
         Parse the formatted search results from the initial Overdrive query and
         add the values to the metadta.
-        
+
         The list object has these values:
         [cover_url[0], social_metadata_url[1], worldcatlink[2], series[3], series_num[4],
         publisher[5], creators[6], reserveid[7], title[8]]

From f7535a51edf98b802218bffd2d2eccbbc5bf913f Mon Sep 17 00:00:00 2001
From: Lee <ldolse@yahoo.com>
Date: Mon, 18 Apr 2011 21:30:35 +0800
Subject: [PATCH 20/30] re-initialize the cookie jar for book records that
 already have an overdrive identifier

---
 src/calibre/ebooks/metadata/sources/overdrive.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/calibre/ebooks/metadata/sources/overdrive.py b/src/calibre/ebooks/metadata/sources/overdrive.py
index 4fc8dbab1b..1b237ad683 100755
--- a/src/calibre/ebooks/metadata/sources/overdrive.py
+++ b/src/calibre/ebooks/metadata/sources/overdrive.py
@@ -312,6 +312,9 @@ class OverDrive(Source):
         search_url = q+'SearchResults.aspx?ReserveID={'+ovrdrv_id+'}'
         results_url = q+'SearchResults.svc/GetResults?sEcho=1&iColumns=18&sColumns=ReserveID%2CTitle%2CSubtitle%2CEdition%2CSeries%2CPublisher%2CFormat%2CFormatID%2CCreators%2CThumbImage%2CShortDescription%2CWorldCatLink%2CExcerptLink%2CCreatorFile%2CSortTitle%2CAvailableToLibrary%2CAvailableToRetailer%2CRelevancyRank&iDisplayStart=0&iDisplayLength=10&sSearch=&bEscapeRegex=true&iSortingCols=1&iSortCol_0=17&sSortDir_0=asc'
 
+        # re-initialize the cookiejar to so that it's clean
+        clean_cj = mechanize.CookieJar()
+        br.set_cookiejar(clean_cj)
         # get the base url to set the proper session cookie
         br.open_novisit(q)
 

From 0991d2ca3ccccf5a9226007e9fd8d7eafdcd624d Mon Sep 17 00:00:00 2001
From: Lee <ldolse@yahoo.com>
Date: Mon, 18 Apr 2011 22:53:19 +0800
Subject: [PATCH 21/30] get rid of test code that applied to the old plugin,
 set defaults

---
 .../ebooks/metadata/sources/overdrive.py      | 40 +++----------------
 1 file changed, 5 insertions(+), 35 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/overdrive.py b/src/calibre/ebooks/metadata/sources/overdrive.py
index 1b237ad683..56a905de03 100755
--- a/src/calibre/ebooks/metadata/sources/overdrive.py
+++ b/src/calibre/ebooks/metadata/sources/overdrive.py
@@ -39,6 +39,11 @@ class OverDrive(Source):
     supports_gzip_transfer_encoding = False
     cached_cover_url_is_reliable = True
 
+    def __init__(self, *args, **kwargs):
+       Source.__init__(self, *args, **kwargs)
+       self.prefs.defaults['ignore_fields'] =['tags', 'pubdate', 'comments', 'identifier:isbn', 'language']
+
+
     def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
             identifiers={}, timeout=30):
         ovrdrv_id = identifiers.get('overdrive', None)
@@ -444,41 +449,6 @@ class OverDrive(Source):
 
 
 def main(args=sys.argv):
-    import tempfile, os, time
-    tdir = tempfile.gettempdir()
-    br = browser()
-    for ovrdrv_id, isbn, title, author in [
-            #(None, '0899661343', 'On the Road', ['Jack Kerouac']), # basic test, no series, single author
-            #(None, '9780061952838', 'The Fellowship of the Ring', ['J. R. R. Tolkien']), # Series test, multi-author
-            #(None, '9780061952838', 'The Two Towers (The Lord of the Rings, Book II)', ['J. R. R. Tolkien']), # Series test, book 2
-            #(None, '9780618153985', 'The Fellowship of the Ring (The Lord of the Rings, Part 1)', ['J.R.R. Tolkien']),
-            #('57844706-20fa-4ace-b5ee-3470b1b52173', None, 'The Two Towers', ['J. R. R. Tolkien']), # Series test, w/ ovrdrv id
-            #(None, '9780345505057', 'Deluge', ['Anne McCaffrey']) # Multiple authors
-            #(None, None, 'Deluge', ['Anne McCaffrey']) # Empty ISBN
-            #(None, None, 'On the Road', ['Jack Kerouac']), # Nonetype ISBN
-            #(None, '9780345435279', 'A Caress of Twilight', ['Laurell K. Hamilton']),
-            #(None, '9780606087230', 'The Omnivore\'s Dilemma : A Natural History of Four Meals', ['Michael Pollan']), # Subtitle colon
-            #(None, '9780061747649', 'Mental_Floss Presents: Condensed Knowledge', ['Will Pearson', 'Mangesh Hattikudur']),
-            #(None, '9781400050802', 'The Zombie Survival Guide', ['Max Brooks']), # Two books with this title by this author
-            #(None, '9781775414315', 'The Worst Journey in the World / Antarctic 1910-1913', ['Apsley Cherry-Garrard']), # Garbage sub-title
-            #(None, '9780440335160', 'Outlander', ['Diana Gabaldon']), # Returns lots of results to sort through to get the best match
-            (None, '9780345509741', 'The Horror Stories of Robert E. Howard', ['Robert E. Howard']), # Complex title with initials/dots stripped, some results don't have a cover
-            ]:
-        cpath = os.path.join(tdir, title+'.jpg')
-        #print "cpath is "+cpath
-        st = time.time()
-        curl = get_cover_url(isbn, title, author, br, ovrdrv_id)
-        #print '\n\n Took ', time.time() - st, ' to get basic metadata\n\n'
-        #if curl is None:
-        #    print 'No cover found for', title
-        #else:
-        #    print "curl is "+curl
-        #    open(cpath, 'wb').write(br.open_novisit(curl).read())
-        #    print 'Cover for', title, 'saved to', cpath
-        st = time.time()
-        #print get_social_metadata(title, author, isbn, ovrdrv_id)
-        #print '\n\n Took ', time.time() - st, ' to get detailed metadata\n\n'
-
     return 0
 
 if __name__ == '__main__':

From 9278da958c420cfe4cfaeb37b5302ccef8d1c358 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 18 Apr 2011 11:06:19 -0600
Subject: [PATCH 22/30] Bulk metadata download: Make the confirm dialog more
 useful

---
 src/calibre/gui2/actions/edit_metadata.py   | 10 +--
 src/calibre/gui2/metadata/bulk_download2.py | 81 +++++++++++++++++----
 2 files changed, 71 insertions(+), 20 deletions(-)

diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py
index 18a73fb282..9d4d3891ca 100644
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@@ -37,8 +37,6 @@ class EditMetadataAction(InterfaceAction):
         md.addSeparator()
         if test_eight_code:
             dall = self.download_metadata
-            dident = partial(self.download_metadata, covers=False)
-            dcovers = partial(self.download_metadata, identify=False)
         else:
             dall = partial(self.download_metadata_old, False, covers=True)
             dident = partial(self.download_metadata_old, False, covers=False)
@@ -47,9 +45,9 @@ class EditMetadataAction(InterfaceAction):
 
         md.addAction(_('Download metadata and covers'), dall,
                 Qt.ControlModifier+Qt.Key_D)
-        md.addAction(_('Download only metadata'), dident)
-        md.addAction(_('Download only covers'), dcovers)
         if not test_eight_code:
+            md.addAction(_('Download only metadata'), dident)
+            md.addAction(_('Download only covers'), dcovers)
             md.addAction(_('Download only social metadata'),
                 partial(self.download_metadata_old, False, covers=False,
                     set_metadata=False, set_social_metadata=True))
@@ -80,7 +78,7 @@ class EditMetadataAction(InterfaceAction):
         self.qaction.setEnabled(enabled)
         self.action_merge.setEnabled(enabled)
 
-    def download_metadata(self, identify=True, covers=True, ids=None):
+    def download_metadata(self, ids=None):
         if ids is None:
             rows = self.gui.library_view.selectionModel().selectedRows()
             if not rows or len(rows) == 0:
@@ -90,7 +88,7 @@ class EditMetadataAction(InterfaceAction):
             ids = [db.id(row.row()) for row in rows]
         from calibre.gui2.metadata.bulk_download2 import start_download
         start_download(self.gui, ids,
-                Dispatcher(self.bulk_metadata_downloaded), identify, covers)
+                Dispatcher(self.bulk_metadata_downloaded))
 
     def bulk_metadata_downloaded(self, job):
         if job.failed:
diff --git a/src/calibre/gui2/metadata/bulk_download2.py b/src/calibre/gui2/metadata/bulk_download2.py
index 5f0af1b316..11cbc65680 100644
--- a/src/calibre/gui2/metadata/bulk_download2.py
+++ b/src/calibre/gui2/metadata/bulk_download2.py
@@ -12,7 +12,8 @@ from functools import partial
 from itertools import izip
 
 from PyQt4.Qt import (QIcon, QDialog, QVBoxLayout, QTextBrowser, QSize,
-        QDialogButtonBox, QApplication, QTimer, QLabel, QProgressBar)
+        QDialogButtonBox, QApplication, QTimer, QLabel, QProgressBar,
+        QGridLayout, QPixmap, Qt)
 
 from calibre.gui2.dialogs.message_box import MessageBox
 from calibre.gui2.threaded_jobs import ThreadedJob
@@ -25,37 +26,86 @@ from calibre.ebooks.metadata.book.base import Metadata
 from calibre.customize.ui import metadata_plugins
 from calibre.ptempfile import PersistentTemporaryFile
 
+# Start download {{{
 def show_config(gui, parent):
     from calibre.gui2.preferences import show_config_widget
     show_config_widget('Sharing', 'Metadata download', parent=parent,
             gui=gui, never_shutdown=True)
 
-def start_download(gui, ids, callback, identify, covers):
-    q = MessageBox(MessageBox.QUESTION,  _('Schedule download?'),
+class ConfirmDialog(QDialog):
+
+    def __init__(self, ids, parent):
+        QDialog.__init__(self, parent)
+        self.setWindowTitle(_('Schedule download?'))
+        self.setWindowIcon(QIcon(I('dialog_question.png')))
+
+        l = self.l = QGridLayout()
+        self.setLayout(l)
+
+        i = QLabel(self)
+        i.setPixmap(QPixmap(I('dialog_question.png')))
+        l.addWidget(i, 0, 0)
+
+        t = QLabel(
             '<p>'+_('The download of metadata for the <b>%d selected book(s)</b> will'
                 ' run in the background. Proceed?')%len(ids) +
             '<p>'+_('You can monitor the progress of the download '
                 'by clicking the rotating spinner in the bottom right '
                 'corner.') +
             '<p>'+_('When the download completes you will be asked for'
-                ' confirmation before calibre applies the downloaded metadata.'),
-            show_copy_button=False, parent=gui)
-    b = q.bb.addButton(_('Configure download'), q.bb.ActionRole)
-    b.setIcon(QIcon(I('config.png')))
-    b.clicked.connect(partial(show_config, gui, q))
-    q.det_msg_toggle.setVisible(False)
+                ' confirmation before calibre applies the downloaded metadata.')
+            )
+        t.setWordWrap(True)
+        l.addWidget(t, 0, 1)
+        l.setColumnStretch(0, 1)
+        l.setColumnStretch(1, 100)
 
-    ret = q.exec_()
-    b.clicked.disconnect()
-    if ret != q.Accepted:
+        self.identify = self.covers = True
+        self.bb = QDialogButtonBox(QDialogButtonBox.Cancel)
+        self.bb.rejected.connect(self.reject)
+        b = self.bb.addButton(_('Download only metadata'),
+                self.bb.AcceptRole)
+        b.clicked.connect(self.only_metadata)
+        b.setIcon(QIcon(I('edit_input.png')))
+        b = self.bb.addButton(_('Download only covers'),
+                self.bb.AcceptRole)
+        b.clicked.connect(self.only_covers)
+        b.setIcon(QIcon(I('default_cover.png')))
+        b = self.b = self.bb.addButton(_('Configure download'), self.bb.ActionRole)
+        b.setIcon(QIcon(I('config.png')))
+        b.clicked.connect(partial(show_config, parent, self))
+        l.addWidget(self.bb, 1, 0, 1, 2)
+        b = self.bb.addButton(_('Download both'),
+                self.bb.AcceptRole)
+        b.clicked.connect(self.accept)
+        b.setDefault(True)
+        b.setAutoDefault(True)
+        b.setIcon(QIcon(I('ok.png')))
+
+        self.resize(self.sizeHint())
+        b.setFocus(Qt.OtherFocusReason)
+
+    def only_metadata(self):
+        self.covers = False
+        self.accept()
+
+    def only_covers(self):
+        self.identify = False
+        self.accept()
+
+def start_download(gui, ids, callback):
+    d = ConfirmDialog(ids, gui)
+    ret = d.exec_()
+    d.b.clicked.disconnect()
+    if ret != d.Accepted:
         return
 
     job = ThreadedJob('metadata bulk download',
             _('Download metadata for %d books')%len(ids),
-            download, (ids, gui.current_db, identify, covers), {}, callback)
+            download, (ids, gui.current_db, d.identify, d.covers), {}, callback)
     gui.job_manager.run_threaded_job(job)
     gui.status_bar.show_message(_('Metadata download started'), 3000)
-
+# }}}
 
 class ViewLog(QDialog): # {{{
 
@@ -93,6 +143,7 @@ def view_log(job, parent):
 
 # }}}
 
+# Apply downloaded metadata {{{
 class ApplyDialog(QDialog):
 
     def __init__(self, id_map, gui):
@@ -248,6 +299,8 @@ def proceed(gui, job):
     q.show()
     q.finished.connect(partial(apply_metadata, job, gui, q))
 
+# }}}
+
 def merge_result(oldmi, newmi):
     dummy = Metadata(_('Unknown'))
     for f in msprefs['ignore_fields']:

From ab1ad20dba92e3be931a80ae2eaf50625341564c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 18 Apr 2011 11:11:11 -0600
Subject: [PATCH 23/30] ...

---
 src/calibre/gui2/metadata/bulk_download2.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/calibre/gui2/metadata/bulk_download2.py b/src/calibre/gui2/metadata/bulk_download2.py
index 11cbc65680..a95c8b52c7 100644
--- a/src/calibre/gui2/metadata/bulk_download2.py
+++ b/src/calibre/gui2/metadata/bulk_download2.py
@@ -63,19 +63,19 @@ class ConfirmDialog(QDialog):
         self.identify = self.covers = True
         self.bb = QDialogButtonBox(QDialogButtonBox.Cancel)
         self.bb.rejected.connect(self.reject)
-        b = self.bb.addButton(_('Download only metadata'),
+        b = self.bb.addButton(_('Download only &metadata'),
                 self.bb.AcceptRole)
         b.clicked.connect(self.only_metadata)
         b.setIcon(QIcon(I('edit_input.png')))
-        b = self.bb.addButton(_('Download only covers'),
+        b = self.bb.addButton(_('Download only &covers'),
                 self.bb.AcceptRole)
         b.clicked.connect(self.only_covers)
         b.setIcon(QIcon(I('default_cover.png')))
-        b = self.b = self.bb.addButton(_('Configure download'), self.bb.ActionRole)
+        b = self.b = self.bb.addButton(_('&Configure download'), self.bb.ActionRole)
         b.setIcon(QIcon(I('config.png')))
         b.clicked.connect(partial(show_config, parent, self))
         l.addWidget(self.bb, 1, 0, 1, 2)
-        b = self.bb.addButton(_('Download both'),
+        b = self.bb.addButton(_('Download &both'),
                 self.bb.AcceptRole)
         b.clicked.connect(self.accept)
         b.setDefault(True)

From 8611632ea4a110f0b2003b994dd05bab96d29597 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 18 Apr 2011 11:51:09 -0600
Subject: [PATCH 24/30] Nicer implementation of apply metadata dialog

---
 src/calibre/gui2/metadata/bulk_download2.py | 40 ++++++++++++---------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/src/calibre/gui2/metadata/bulk_download2.py b/src/calibre/gui2/metadata/bulk_download2.py
index a95c8b52c7..4aa4561078 100644
--- a/src/calibre/gui2/metadata/bulk_download2.py
+++ b/src/calibre/gui2/metadata/bulk_download2.py
@@ -146,7 +146,7 @@ def view_log(job, parent):
 # Apply downloaded metadata {{{
 class ApplyDialog(QDialog):
 
-    def __init__(self, id_map, gui):
+    def __init__(self, gui):
         QDialog.__init__(self, gui)
 
         self.l = l = QVBoxLayout()
@@ -155,27 +155,33 @@ class ApplyDialog(QDialog):
 
         self.pb = QProgressBar(self)
         l.addWidget(self.pb)
-        self.pb.setMinimum(0)
-        self.pb.setMaximum(len(id_map))
 
         self.bb = QDialogButtonBox(QDialogButtonBox.Cancel)
         self.bb.rejected.connect(self.reject)
-        self.bb.accepted.connect(self.accept)
         l.addWidget(self.bb)
 
         self.gui = gui
+        self.timer = QTimer(self)
+        self.timer.timeout.connect(self.do_one)
+
+    def start(self, id_map):
         self.id_map = list(id_map.iteritems())
         self.current_idx = 0
-
         self.failures = []
         self.ids = []
         self.canceled = False
-
-        QTimer.singleShot(20, self.do_one)
+        self.pb.setMinimum(0)
+        self.pb.setMaximum(len(id_map))
+        self.timer.start(50)
 
     def do_one(self):
         if self.canceled:
             return
+        if self.current_idx >= len(self.id_map):
+            self.timer.stop()
+            self.finalize()
+            return
+
         i, mi = self.id_map[self.current_idx]
         db = self.gui.current_db
         try:
@@ -195,15 +201,11 @@ class ApplyDialog(QDialog):
             pass
 
         self.pb.setValue(self.pb.value()+1)
-
-        if self.current_idx >= len(self.id_map) - 1:
-            self.finalize()
-        else:
-            self.current_idx += 1
-            QTimer.singleShot(20, self.do_one)
+        self.current_idx += 1
 
     def reject(self):
         self.canceled = True
+        self.timer.stop()
         QDialog.reject(self)
 
     def finalize(self):
@@ -220,17 +222,18 @@ class ApplyDialog(QDialog):
                     title += ' - ' + authors_to_string(authors)
                 msg.append(title+'\n\n'+tb+'\n'+('*'*80))
 
-            error_dialog(self, _('Some failures'),
+            parent = self if self.isVisible() else self.parent()
+            error_dialog(parent, _('Some failures'),
                 _('Failed to apply updated metadata for some books'
                     ' in your library. Click "Show Details" to see '
                     'details.'), det_msg='\n\n'.join(msg), show=True)
-        self.accept()
         if self.ids:
             cr = self.gui.library_view.currentIndex().row()
             self.gui.library_view.model().refresh_ids(
                 self.ids, cr)
             if self.gui.cover_flow:
                 self.gui.cover_flow.dataChanged()
+        self.accept()
 
 _amd = None
 def apply_metadata(job, gui, q, result):
@@ -268,8 +271,11 @@ def apply_metadata(job, gui, q, result):
                     'Do you want to proceed?'), det_msg='\n'.join(modified)):
             return
 
-    _amd = ApplyDialog(id_map, gui)
-    _amd.exec_()
+    if _amd is None:
+        _amd = ApplyDialog(gui)
+    _amd.start(id_map)
+    if len(id_map) > 3:
+        _amd.exec_()
 
 def proceed(gui, job):
     gui.status_bar.show_message(_('Metadata download completed'), 3000)

From b0ec35f0d310d861aa72a423a6337acd0bb25da5 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 18 Apr 2011 14:06:07 -0600
Subject: [PATCH 25/30] ...

---
 src/calibre/gui2/metadata/single_download.py | 11 ++++-------
 src/calibre/gui2/preferences/__init__.py     |  8 +++++++-
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/calibre/gui2/metadata/single_download.py b/src/calibre/gui2/metadata/single_download.py
index 7e30f02420..c4e13a90f8 100644
--- a/src/calibre/gui2/metadata/single_download.py
+++ b/src/calibre/gui2/metadata/single_download.py
@@ -30,7 +30,6 @@ from calibre.ebooks.metadata.book.base import Metadata
 from calibre.gui2 import error_dialog, NONE
 from calibre.utils.date import utcnow, fromordinal, format_date
 from calibre.library.comments import comments_to_html
-from calibre.constants import islinux
 from calibre import force_unicode
 # }}}
 
@@ -117,12 +116,10 @@ class CoverDelegate(QStyledItemDelegate): # {{{
 
     def paint(self, painter, option, index):
         QStyledItemDelegate.paint(self, painter, option, index)
-        if islinux:
-            # On linux for some reason the selected color is drawn on top of
-            # the decoration
-            style = QApplication.style()
-            style.drawItemPixmap(painter, option.rect, Qt.AlignTop|Qt.AlignHCenter,
-                QPixmap(index.data(Qt.DecorationRole)))
+        # Ensure the cover is rendered over any selection rect
+        style = QApplication.style()
+        style.drawItemPixmap(painter, option.rect, Qt.AlignTop|Qt.AlignHCenter,
+            QPixmap(index.data(Qt.DecorationRole)))
         if self.timer.isActive() and index.data(Qt.UserRole).toBool():
             rect = QRect(0, 0, self.spinner_width, self.spinner_width)
             rect.moveCenter(option.rect.center())
diff --git a/src/calibre/gui2/preferences/__init__.py b/src/calibre/gui2/preferences/__init__.py
index 649a58448d..5b0a05ba40 100644
--- a/src/calibre/gui2/preferences/__init__.py
+++ b/src/calibre/gui2/preferences/__init__.py
@@ -337,7 +337,13 @@ def show_config_widget(category, name, gui=None, show_restart_msg=False,
     bb.button(bb.RestoreDefaults).setEnabled(w.supports_restoring_to_defaults)
     bb.button(bb.Apply).setEnabled(False)
     bb.button(bb.Apply).clicked.connect(d.accept)
-    w.changed_signal.connect(lambda : bb.button(bb.Apply).setEnabled(True))
+    def onchange():
+        b = bb.button(bb.Apply)
+        b.setEnabled(True)
+        b.setDefault(True)
+        b.setAutoDefault(True)
+    w.changed_signal.connect(onchange)
+    bb.button(bb.Cancel).setFocus(True)
     l = QVBoxLayout()
     d.setLayout(l)
     l.addWidget(w)

From 97c5bf39c13ec466712869526bc82d9f4566ef62 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 18 Apr 2011 14:08:03 -0600
Subject: [PATCH 26/30] ...

---
 src/calibre/devices/android/driver.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py
index 44d9bc1e49..7fe246f450 100644
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@@ -108,10 +108,10 @@ class ANDROID(USBMS):
             'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
             'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
             '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
-            'MB860', 'MULTI-CARD', 'MID7015A']
+            'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE']
     WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
             'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
-            'A70S', 'A101IT', '7']
+            'A70S', 'A101IT', '7', 'INCREDIBLE']
 
     OSX_MAIN_MEM = 'Android Device Main Memory'
 

From b79faeff5691fb11e110c24bad69cb60fc05ce82 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Mon, 18 Apr 2011 21:38:27 +0100
Subject: [PATCH 27/30] Change author_sort_copy_method default from invert to
 comma.

---
 resources/default_tweaks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py
index c4c951f980..091aa9a34d 100644
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@@ -48,7 +48,7 @@ authors_completer_append_separator = False
 # When this tweak is changed, the author_sort values stored with each author
 # must be recomputed by right-clicking on an author in the left-hand tags pane,
 # selecting 'manage authors', and pressing 'Recalculate all author sort values'.
-author_sort_copy_method = 'invert'
+author_sort_copy_method = 'comma'
 
 #: Use author sort in Tag Browser
 # Set which author field to display in the tags pane (the list of authors,

From b6f44d0b7c0de9b2b9a6bfbb29d2874ea9718e7b Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Mon, 18 Apr 2011 18:52:14 -0400
Subject: [PATCH 28/30] Store: Search allows for main window location and
 boolean filtering.

---
 src/calibre/gui2/store/search.py | 91 +++++++++++++++++++++++++++++++-
 1 file changed, 89 insertions(+), 2 deletions(-)

diff --git a/src/calibre/gui2/store/search.py b/src/calibre/gui2/store/search.py
index 1d263959ef..ce74d52547 100644
--- a/src/calibre/gui2/store/search.py
+++ b/src/calibre/gui2/store/search.py
@@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en'
 
 import re
 import time
+import traceback
 from contextlib import closing
 from random import shuffle
 from threading import Thread
@@ -20,9 +21,12 @@ from calibre import browser
 from calibre.gui2 import NONE
 from calibre.gui2.progress_indicator import ProgressIndicator
 from calibre.gui2.store.search_ui import Ui_Dialog
+from calibre.library.caches import _match, CONTAINS_MATCH, EQUALS_MATCH, \
+    REGEXP_MATCH
 from calibre.utils.config import DynamicConfig
 from calibre.utils.icu import sort_key
 from calibre.utils.magick.draw import thumbnail
+from calibre.utils.search_query_parser import SearchQueryParser
 
 HANG_TIME = 75000 # milliseconds seconds
 TIMEOUT = 75 # seconds
@@ -290,11 +294,15 @@ class SearchThread(Thread):
         while self._run and not self.tasks.empty():
             try:
                 query, store_name, store_plugin, timeout = self.tasks.get()
-                for res in store_plugin.search(query, timeout=timeout):
+                squery = query
+                for loc in SearchFilter.USABLE_LOCATIONS:
+                    squery = re.sub(r'%s:"?(?P<a>[^\s"]+)"?' % loc, '\g<a>', squery)
+                for res in store_plugin.search(squery, timeout=timeout):
                     if not self._run:
                         return
                     res.store_name = store_name
-                    self.results.put(res)
+                    if SearchFilter(res).parse(query):
+                        self.results.put(res)
                 self.tasks.task_done()
             except:
                 pass
@@ -450,3 +458,82 @@ class Matches(QAbstractItemModel):
         if reset:
             self.reset()
 
+
+class SearchFilter(SearchQueryParser):
+    
+    USABLE_LOCATIONS = [
+        'all',
+        'author',
+        'authors',
+        'cover',
+        'price',
+        'title',
+        'store',
+    ]
+
+    def __init__(self, search_result):
+        SearchQueryParser.__init__(self, locations=self.USABLE_LOCATIONS)
+        self.search_result = search_result
+
+    def universal_set(self):
+        return set([self.search_result])
+
+    def get_matches(self, location, query):
+        location = location.lower().strip()
+        if location == 'authors':
+            location = 'author'
+
+        matchkind = CONTAINS_MATCH
+        if len(query) > 1:
+            if query.startswith('\\'):
+                query = query[1:]
+            elif query.startswith('='):
+                matchkind = EQUALS_MATCH
+                query = query[1:]
+            elif query.startswith('~'):
+                matchkind = REGEXP_MATCH
+                query = query[1:]
+        if matchkind != REGEXP_MATCH: ### leave case in regexps because it can be significant e.g. \S \W \D
+            query = query.lower()
+
+        if location not in self.USABLE_LOCATIONS:
+            return set([])
+        matches = set([])
+        all_locs = set(self.USABLE_LOCATIONS) - set(['all'])
+        locations = all_locs if location == 'all' else [location]
+        q = {
+             'author': self.search_result.author.lower(),
+             'cover': self.search_result.cover_url,
+             'format': '',
+             'price': self.search_result.price,
+             'store': self.search_result.store_name.lower(),
+             'title': self.search_result.title.lower(),
+        }
+        for x in ('author', 'format'):
+            q[x+'s'] = q[x]
+        for locvalue in locations:
+            ac_val = q[locvalue]
+            if query == 'true':
+                if ac_val is not None:
+                    matches.add(self.search_result)
+                continue
+            if query == 'false':
+                if ac_val is None:
+                    matches.add(self.search_result)
+                continue
+            try:
+                ### Can't separate authors because comma is used for name sep and author sep
+                ### Exact match might not get what you want. For that reason, turn author
+                ### exactmatch searches into contains searches.
+                if locvalue == 'author' and matchkind == EQUALS_MATCH:
+                    m = CONTAINS_MATCH
+                else:
+                    m = matchkind
+
+                vals = [ac_val]
+                if _match(query, vals, m):
+                    matches.add(self.search_result)
+                    break
+            except ValueError: # Unicode errors
+                traceback.print_exc()
+        return matches

From 234248cd23826764240e6ea1a4ac91c02cc23371 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Mon, 18 Apr 2011 18:57:28 -0400
Subject: [PATCH 29/30] Store: Fix issue with using proxy when an arument is
 None.

---
 src/calibre/gui2/store/web_control.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/calibre/gui2/store/web_control.py b/src/calibre/gui2/store/web_control.py
index 874328f872..0b79c526a8 100644
--- a/src/calibre/gui2/store/web_control.py
+++ b/src/calibre/gui2/store/web_control.py
@@ -31,10 +31,14 @@ class NPWebView(QWebView):
             proxy_parts = urlparse(http_proxy)
             proxy = QNetworkProxy()
             proxy.setType(QNetworkProxy.HttpProxy)
-            proxy.setUser(proxy_parts.username)
-            proxy.setPassword(proxy_parts.password)
-            proxy.setHostName(proxy_parts.hostname)
-            proxy.setPort(proxy_parts.port)
+            if proxy_parts.username:
+                proxy.setUser(proxy_parts.username)
+            if proxy_parts.password:
+                proxy.setPassword(proxy_parts.password)
+            if proxy_parts.hostname:
+                proxy.setHostName(proxy_parts.hostname)
+            if proxy_parts.port:
+                proxy.setPort(proxy_parts.port)
             self.page().networkAccessManager().setProxy(proxy)
 
         self.page().setForwardUnsupportedContent(True)

From 8d174eaffdfcda971885b80e0705bd221ea11f79 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 18 Apr 2011 20:56:56 -0600
Subject: [PATCH 30/30] ...

---
 src/calibre/ebooks/metadata/sources/base.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index 37407a0656..86a9fe1133 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -377,8 +377,9 @@ class Source(Plugin):
         This URL must be browseable to by a human using a browser. It is meant
         to provide a clickable link for the user to easily visit the books page
         at this source.
-        If no URL is found, return None. This method must be quick, either it
-        should construct the URL using a known URL scheme or use a cached URL.
+        If no URL is found, return None. This method must be quick, and
+        consistent, so only implement it if it is possible to construct the URL
+        from a known scheme given identifiers.
         '''
         return None