ManyBooks store. Gutenberg search filters invalid items quicker and ensures full id is not truncated.

2025-07-09 03:04:10 -04:00 · 2011-02-26 11:51:01 -05:00 · 2011-02-26 11:51:01 -05:00 · df8347a62b
commit df8347a62b
parent 7d9b44d7de
3 changed files with 74 additions and 5 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -1043,7 +1043,8 @@ plugins += [GoogleBooks]
 # Store plugins {{{
 from calibre.gui2.store.amazon_plugin import AmazonKindleStore
 from calibre.gui2.store.gutenberg_plugin import GutenbergStore
+from calibre.gui2.store.manybooks_plugin import ManyBooksStore

-plugins += [AmazonKindleStore, GutenbergStore]
+plugins += [AmazonKindleStore, GutenbergStore, ManyBooksStore]

 # }}}
--- a/src/calibre/gui2/store/gutenberg_plugin.py
+++ b/src/calibre/gui2/store/gutenberg_plugin.py
@ -38,14 +38,22 @@ class GutenbergStore(StorePlugin):
                if counter <= 0:
                    break
                
-                heading = ''.join(data.xpath('div[@class="jd"]/a//text()'))
+                url = ''
+                url_a = data.xpath('div[@class="jd"]/a')
+                if url_a:
+                    url_a = url_a[0]
+                    url = url_a.get('href', None)
+                if url:
+                    url = url.split('u=')[-1].split('&')[0]
+                if '/ebooks/' not in url:
+                    continue
+                id = url.split('/')[-1]
+                
+                heading = ''.join(url_a.xpath('text()'))
                title, _, author = heading.partition('by')
                author = author.split('-')[0]
                price = '$0.00'
                
-                url = ''.join(data.xpath('span[@class="c"]/text()'))
-                id = url.split('/')[-1]
-                
                counter -= 1
                yield ('', title.strip(), author.strip(), price.strip(), '/ebooks/' + id.strip())

--- a/src/calibre/gui2/store/manybooks_plugin.py
+++ b/src/calibre/gui2/store/manybooks_plugin.py
@ -0,0 +1,60 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2011, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import urllib2
+from contextlib import closing
+
+from lxml import html
+
+from calibre import browser
+from calibre.customize import StorePlugin
+
+class ManyBooksStore(StorePlugin):
+    
+    name           = 'ManyBooks'
+    description    = _('The best ebooks at the best price: free!.')
+    
+        
+    def open(self, gui, parent=None, start_item=None):
+        from calibre.gui2.store.web_store_dialog import WebStoreDialog
+        d = WebStoreDialog(gui, 'http://manybooks.net/', parent, start_item)
+        d.setWindowTitle('Ad-free eBooks for your eBook reader')
+        d = d.exec_()
+
+    def search(self, query, max_results=10, timeout=60):
+        # ManyBooks website separates results for title and author.
+        # Using a google search so we can search on both fields at once.
+        url = 'http://www.google.com/xhtml?q=site:manybooks.net+' + urllib2.quote(query)
+        
+        br = browser()
+        
+        counter = max_results
+        with closing(br.open(url, timeout=timeout)) as f:
+            doc = html.fromstring(f.read())
+            for data in doc.xpath('//div[@class="edewpi"]//div[@class="r ld"]'):
+                if counter <= 0:
+                    break
+                
+                url = ''
+                url_a = data.xpath('div[@class="jd"]/a')
+                if url_a:
+                    url_a = url_a[0]
+                    url = url_a.get('href', None)
+                if url:
+                    url = url.split('u=')[-1][:-2]
+                if '/titles/' not in url:
+                    continue
+                id = url.split('/')[-1]
+                
+                heading = ''.join(url_a.xpath('text()'))
+                title, _, author = heading.partition('by')
+                author = author.split('-')[0]
+                price = '$0.00'
+                
+                counter -= 1
+                yield ('', title.strip(), author.strip(), price.strip(), '/titles/' + id.strip())
+
+