Store: Gutenberg, rewrite plugin to use gutenberg's search and allow for direct downloading.

2025-07-09 03:04:10 -04:00 · 2011-07-03 11:40:24 -04:00 · 2011-07-03 11:40:24 -04:00 · 6df5c994d9
commit 6df5c994d9
parent 38e8eb3616
1 changed files with 34 additions and 36 deletions
--- a/src/calibre/gui2/store/stores/gutenberg_plugin.py
+++ b/src/calibre/gui2/store/stores/gutenberg_plugin.py
@ -6,6 +6,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

+import mimetypes
 import urllib
 from contextlib import closing

@ -23,70 +24,67 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
 class GutenbergStore(BasicStoreConfig, StorePlugin):
        
    def open(self, parent=None, detail_item=None, external=False):
-        url = 'http://m.gutenberg.org/'
-        ext_url = 'http://gutenberg.org/'
+        url = 'http://gutenberg.org/'
+        
+        if detail_item:
+            detail_item = url_slash_cleaner(url + detail_item)

        if external or self.config.get('open_external', False):
-            if detail_item:
-                ext_url = ext_url + detail_item
-            open_url(QUrl(url_slash_cleaner(ext_url)))
+            open_url(QUrl(detail_item if detail_item else url))
        else:
-            detail_url = None
-            if detail_item:
-                detail_url = url + detail_item
-            d = WebStoreDialog(self.gui, url, parent, detail_url)
+            d = WebStoreDialog(self.gui, url, parent, detail_item)
            d.setWindowTitle(self.name)
            d.set_tags(self.config.get('tags', ''))
            d.exec_()

    def search(self, query, max_results=10, timeout=60):
-        # Gutenberg's website does not allow searching both author and title.
-        # Using a google search so we can search on both fields at once.
-        url = 'http://www.google.com/xhtml?q=site:gutenberg.org+' + urllib.quote_plus(query)
+        url = 'http://m.gutenberg.org/ebooks/search.mobile/?default_prefix=all&sort_order=title&query=' + urllib.quote_plus(query)
        
        br = browser()
        
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
-            for data in doc.xpath('//div[@class="edewpi"]//div[@class="r ld"]'):
+            for data in doc.xpath('//ol[@class="results"]//li[contains(@class, "icon_title")]'):
                if counter <= 0:
                    break

-                url = ''
-                url_a = data.xpath('div[@class="jd"]/a')
-                if url_a:
-                    url_a = url_a[0]
-                    url = url_a.get('href', None)
-                if url:
-                    url = url.split('u=')[-1].split('&')[0]
-                if '/ebooks/' not in url:
-                    continue
-                id = url.split('/')[-1]
+                id = ''.join(data.xpath('./a/@href'))
+                id = id.split('.mobile')[0]
                
-                url_a = html.fromstring(html.tostring(url_a))
-                heading = ''.join(url_a.xpath('//text()'))
-                title, _, author = heading.rpartition('by ')
-                author = author.split('-')[0]
-                price = '$0.00'
+                title = ''.join(data.xpath('.//span[@class="title"]/text()'))
+                author = ''.join(data.xpath('.//span[@class="subtitle"]/text()'))
                
                counter -= 1
                
                s = SearchResult()
                s.cover_url = ''
+                
+                s.detail_item = id.strip()
                s.title = title.strip()
                s.author = author.strip()
-                s.price = price.strip()
-                s.detail_item = '/ebooks/' + id.strip()
+                s.price = '$0.00'
                s.drm = SearchResult.DRM_UNLOCKED
                
                yield s

    def get_details(self, search_result, timeout):
-        url = 'http://m.gutenberg.org/'
+        url = url_slash_cleaner('http://m.gutenberg.org/' + search_result.detail_item + '.mobile')
        
        br = browser()
-        with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
-            idata = html.fromstring(nf.read())
-            search_result.formats = ', '.join(idata.xpath('//a[@type!="application/atom+xml"]//span[@class="title"]/text()'))
+        with closing(br.open(url, timeout=timeout)) as nf:
+            doc = html.fromstring(nf.read())
+            
+            for save_item in doc.xpath('//li[contains(@class, "icon_save")]/a'):
+                type = save_item.get('type')
+                href = save_item.get('href')
+                
+                if type:
+                    ext = mimetypes.guess_extension(type)
+                    if ext:
+                        ext = ext[1:].upper().strip()
+                        search_result.downloads[ext] = href
+
+                search_result.formats = ', '.join(search_result.downloads.keys())
+
        return True