From 6df5c994d9955e7e0de057435a7703c2c378d6c1 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 3 Jul 2011 11:40:24 -0400
Subject: [PATCH] Store: Gutenberg, rewrite plugin to use gutenberg's search
 and allow for direct downloading.

---
 .../gui2/store/stores/gutenberg_plugin.py     | 70 +++++++++----------
 1 file changed, 34 insertions(+), 36 deletions(-)

diff --git a/src/calibre/gui2/store/stores/gutenberg_plugin.py b/src/calibre/gui2/store/stores/gutenberg_plugin.py
index 85d1f3966a..ad30f2067d 100644
--- a/src/calibre/gui2/store/stores/gutenberg_plugin.py
+++ b/src/calibre/gui2/store/stores/gutenberg_plugin.py
@@ -6,6 +6,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 
+import mimetypes
 import urllib
 from contextlib import closing
 
@@ -23,70 +24,67 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
 class GutenbergStore(BasicStoreConfig, StorePlugin):
         
     def open(self, parent=None, detail_item=None, external=False):
-        url = 'http://m.gutenberg.org/'
-        ext_url = 'http://gutenberg.org/'
+        url = 'http://gutenberg.org/'
+        
+        if detail_item:
+            detail_item = url_slash_cleaner(url + detail_item)
 
         if external or self.config.get('open_external', False):
-            if detail_item:
-                ext_url = ext_url + detail_item
-            open_url(QUrl(url_slash_cleaner(ext_url)))
+            open_url(QUrl(detail_item if detail_item else url))
         else:
-            detail_url = None
-            if detail_item:
-                detail_url = url + detail_item
-            d = WebStoreDialog(self.gui, url, parent, detail_url)
+            d = WebStoreDialog(self.gui, url, parent, detail_item)
             d.setWindowTitle(self.name)
             d.set_tags(self.config.get('tags', ''))
             d.exec_()
 
     def search(self, query, max_results=10, timeout=60):
-        # Gutenberg's website does not allow searching both author and title.
-        # Using a google search so we can search on both fields at once.
-        url = 'http://www.google.com/xhtml?q=site:gutenberg.org+' + urllib.quote_plus(query)
+        url = 'http://m.gutenberg.org/ebooks/search.mobile/?default_prefix=all&sort_order=title&query=' + urllib.quote_plus(query)
         
         br = browser()
         
         counter = max_results
         with closing(br.open(url, timeout=timeout)) as f:
             doc = html.fromstring(f.read())
-            for data in doc.xpath('//div[@class="edewpi"]//div[@class="r ld"]'):
+            for data in doc.xpath('//ol[@class="results"]//li[contains(@class, "icon_title")]'):
                 if counter <= 0:
                     break
+
+                id = ''.join(data.xpath('./a/@href'))
+                id = id.split('.mobile')[0]
                 
-                url = ''
-                url_a = data.xpath('div[@class="jd"]/a')
-                if url_a:
-                    url_a = url_a[0]
-                    url = url_a.get('href', None)
-                if url:
-                    url = url.split('u=')[-1].split('&')[0]
-                if '/ebooks/' not in url:
-                    continue
-                id = url.split('/')[-1]
-                
-                url_a = html.fromstring(html.tostring(url_a))
-                heading = ''.join(url_a.xpath('//text()'))
-                title, _, author = heading.rpartition('by ')
-                author = author.split('-')[0]
-                price = '$0.00'
+                title = ''.join(data.xpath('.//span[@class="title"]/text()'))
+                author = ''.join(data.xpath('.//span[@class="subtitle"]/text()'))
                 
                 counter -= 1
                 
                 s = SearchResult()
                 s.cover_url = ''
+                
+                s.detail_item = id.strip()
                 s.title = title.strip()
                 s.author = author.strip()
-                s.price = price.strip()
-                s.detail_item = '/ebooks/' + id.strip()
+                s.price = '$0.00'
                 s.drm = SearchResult.DRM_UNLOCKED
                 
                 yield s
 
     def get_details(self, search_result, timeout):
-        url = 'http://m.gutenberg.org/'
+        url = url_slash_cleaner('http://m.gutenberg.org/' + search_result.detail_item + '.mobile')
         
         br = browser()
-        with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
-            idata = html.fromstring(nf.read())
-            search_result.formats = ', '.join(idata.xpath('//a[@type!="application/atom+xml"]//span[@class="title"]/text()'))
-        return True
\ No newline at end of file
+        with closing(br.open(url, timeout=timeout)) as nf:
+            doc = html.fromstring(nf.read())
+            
+            for save_item in doc.xpath('//li[contains(@class, "icon_save")]/a'):
+                type = save_item.get('type')
+                href = save_item.get('href')
+                
+                if type:
+                    ext = mimetypes.guess_extension(type)
+                    if ext:
+                        ext = ext[1:].upper().strip()
+                        search_result.downloads[ext] = href
+
+                search_result.formats = ', '.join(search_result.downloads.keys())
+
+        return True