From 6df5c994d9955e7e0de057435a7703c2c378d6c1 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sun, 3 Jul 2011 11:40:24 -0400 Subject: [PATCH] Store: Gutenberg, rewrite plugin to use gutenberg's search and allow for direct downloading. --- .../gui2/store/stores/gutenberg_plugin.py | 70 +++++++++---------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/src/calibre/gui2/store/stores/gutenberg_plugin.py b/src/calibre/gui2/store/stores/gutenberg_plugin.py index 85d1f3966a..ad30f2067d 100644 --- a/src/calibre/gui2/store/stores/gutenberg_plugin.py +++ b/src/calibre/gui2/store/stores/gutenberg_plugin.py @@ -6,6 +6,7 @@ __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' +import mimetypes import urllib from contextlib import closing @@ -23,70 +24,67 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog class GutenbergStore(BasicStoreConfig, StorePlugin): def open(self, parent=None, detail_item=None, external=False): - url = 'http://m.gutenberg.org/' - ext_url = 'http://gutenberg.org/' + url = 'http://gutenberg.org/' + + if detail_item: + detail_item = url_slash_cleaner(url + detail_item) if external or self.config.get('open_external', False): - if detail_item: - ext_url = ext_url + detail_item - open_url(QUrl(url_slash_cleaner(ext_url))) + open_url(QUrl(detail_item if detail_item else url)) else: - detail_url = None - if detail_item: - detail_url = url + detail_item - d = WebStoreDialog(self.gui, url, parent, detail_url) + d = WebStoreDialog(self.gui, url, parent, detail_item) d.setWindowTitle(self.name) d.set_tags(self.config.get('tags', '')) d.exec_() def search(self, query, max_results=10, timeout=60): - # Gutenberg's website does not allow searching both author and title. - # Using a google search so we can search on both fields at once. - url = 'http://www.google.com/xhtml?q=site:gutenberg.org+' + urllib.quote_plus(query) + url = 'http://m.gutenberg.org/ebooks/search.mobile/?default_prefix=all&sort_order=title&query=' + urllib.quote_plus(query) br = browser() counter = max_results with closing(br.open(url, timeout=timeout)) as f: doc = html.fromstring(f.read()) - for data in doc.xpath('//div[@class="edewpi"]//div[@class="r ld"]'): + for data in doc.xpath('//ol[@class="results"]//li[contains(@class, "icon_title")]'): if counter <= 0: break + + id = ''.join(data.xpath('./a/@href')) + id = id.split('.mobile')[0] - url = '' - url_a = data.xpath('div[@class="jd"]/a') - if url_a: - url_a = url_a[0] - url = url_a.get('href', None) - if url: - url = url.split('u=')[-1].split('&')[0] - if '/ebooks/' not in url: - continue - id = url.split('/')[-1] - - url_a = html.fromstring(html.tostring(url_a)) - heading = ''.join(url_a.xpath('//text()')) - title, _, author = heading.rpartition('by ') - author = author.split('-')[0] - price = '$0.00' + title = ''.join(data.xpath('.//span[@class="title"]/text()')) + author = ''.join(data.xpath('.//span[@class="subtitle"]/text()')) counter -= 1 s = SearchResult() s.cover_url = '' + + s.detail_item = id.strip() s.title = title.strip() s.author = author.strip() - s.price = price.strip() - s.detail_item = '/ebooks/' + id.strip() + s.price = '$0.00' s.drm = SearchResult.DRM_UNLOCKED yield s def get_details(self, search_result, timeout): - url = 'http://m.gutenberg.org/' + url = url_slash_cleaner('http://m.gutenberg.org/' + search_result.detail_item + '.mobile') br = browser() - with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf: - idata = html.fromstring(nf.read()) - search_result.formats = ', '.join(idata.xpath('//a[@type!="application/atom+xml"]//span[@class="title"]/text()')) - return True \ No newline at end of file + with closing(br.open(url, timeout=timeout)) as nf: + doc = html.fromstring(nf.read()) + + for save_item in doc.xpath('//li[contains(@class, "icon_save")]/a'): + type = save_item.get('type') + href = save_item.get('href') + + if type: + ext = mimetypes.guess_extension(type) + if ext: + ext = ext[1:].upper().strip() + search_result.downloads[ext] = href + + search_result.formats = ', '.join(search_result.downloads.keys()) + + return True