Store: Gutenberg, rewrite plugin to use gutenberg's search and allow for direct downloading.

2025-07-09 03:04:10 -04:00 · 2011-07-03 11:40:24 -04:00 · 2011-07-03 11:40:24 -04:00 · 6df5c994d9
commit 6df5c994d9
parent 38e8eb3616
1 changed files with 34 additions and 36 deletions
--- a/src/calibre/gui2/store/stores/gutenberg_plugin.py
+++ b/src/calibre/gui2/store/stores/gutenberg_plugin.py
@ -6,6 +6,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import mimetypes
 import urllib
 from contextlib import closing
@ -23,70 +24,67 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
 class GutenbergStore(BasicStoreConfig, StorePlugin):
    def open(self, parent=None, detail_item=None, external=False):
-        url = 'http://m.gutenberg.org/'
+        url = 'http://gutenberg.org/'
-        ext_url = 'http://gutenberg.org/'
+        
        if detail_item:
            detail_item = url_slash_cleaner(url + detail_item)
        if external or self.config.get('open_external', False):
-            if detail_item:
+            open_url(QUrl(detail_item if detail_item else url))
                ext_url = ext_url + detail_item
            open_url(QUrl(url_slash_cleaner(ext_url)))
        else:
-            detail_url = None
+            d = WebStoreDialog(self.gui, url, parent, detail_item)
            if detail_item:
                detail_url = url + detail_item
            d = WebStoreDialog(self.gui, url, parent, detail_url)
            d.setWindowTitle(self.name)
            d.set_tags(self.config.get('tags', ''))
            d.exec_()
    def search(self, query, max_results=10, timeout=60):
-        # Gutenberg's website does not allow searching both author and title.
+        url = 'http://m.gutenberg.org/ebooks/search.mobile/?default_prefix=all&sort_order=title&query=' + urllib.quote_plus(query)
        # Using a google search so we can search on both fields at once.
        url = 'http://www.google.com/xhtml?q=site:gutenberg.org+' + urllib.quote_plus(query)
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
-            for data in doc.xpath('//div[@class="edewpi"]//div[@class="r ld"]'):
+            for data in doc.xpath('//ol[@class="results"]//li[contains(@class, "icon_title")]'):
                if counter <= 0:
                    break
                id = ''.join(data.xpath('./a/@href'))
                id = id.split('.mobile')[0]
-                url = ''
+                title = ''.join(data.xpath('.//span[@class="title"]/text()'))
-                url_a = data.xpath('div[@class="jd"]/a')
+                author = ''.join(data.xpath('.//span[@class="subtitle"]/text()'))
                if url_a:
                    url_a = url_a[0]
                    url = url_a.get('href', None)
                if url:
                    url = url.split('u=')[-1].split('&')[0]
                if '/ebooks/' not in url:
                    continue
                id = url.split('/')[-1]
                url_a = html.fromstring(html.tostring(url_a))
                heading = ''.join(url_a.xpath('//text()'))
                title, _, author = heading.rpartition('by ')
                author = author.split('-')[0]
                price = '$0.00'
                counter -= 1
                s = SearchResult()
                s.cover_url = ''
                s.detail_item = id.strip()
                s.title = title.strip()
                s.author = author.strip()
-                s.price = price.strip()
+                s.price = '$0.00'
                s.detail_item = '/ebooks/' + id.strip()
                s.drm = SearchResult.DRM_UNLOCKED
                yield s
    def get_details(self, search_result, timeout):
-        url = 'http://m.gutenberg.org/'
+        url = url_slash_cleaner('http://m.gutenberg.org/' + search_result.detail_item + '.mobile')
        br = browser()
-        with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
+        with closing(br.open(url, timeout=timeout)) as nf:
-            idata = html.fromstring(nf.read())
+            doc = html.fromstring(nf.read())
-            search_result.formats = ', '.join(idata.xpath('//a[@type!="application/atom+xml"]//span[@class="title"]/text()'))
+            
-        return True
+            for save_item in doc.xpath('//li[contains(@class, "icon_save")]/a'):
                type = save_item.get('type')
                href = save_item.get('href')
                if type:
                    ext = mimetypes.guess_extension(type)
                    if ext:
                        ext = ext[1:].upper().strip()
                        search_result.downloads[ext] = href
                search_result.formats = ', '.join(search_result.downloads.keys())
        return True