From 3739f8d1de99e3987a4028eb58dece2017c509c0 Mon Sep 17 00:00:00 2001 From: Alex Stanev Date: Wed, 27 Jul 2011 18:10:04 +0300 Subject: [PATCH 1/4] Improved chitanka.info and e-knigi.net --- .../gui2/store/stores/chitanka_plugin.py | 101 +++++++++++------- .../gui2/store/stores/eknigi_plugin.py | 19 +++- 2 files changed, 76 insertions(+), 44 deletions(-) diff --git a/src/calibre/gui2/store/stores/chitanka_plugin.py b/src/calibre/gui2/store/stores/chitanka_plugin.py index 3e4364d9fa..a1a22797c8 100644 --- a/src/calibre/gui2/store/stores/chitanka_plugin.py +++ b/src/calibre/gui2/store/stores/chitanka_plugin.py @@ -6,7 +6,8 @@ __license__ = 'GPL 3' __copyright__ = '2011, Alex Stanev ' __docformat__ = 'restructuredtext en' -import urllib +import re +import urllib2 from contextlib import closing from lxml import html @@ -39,54 +40,24 @@ class ChitankaStore(BasicStoreConfig, StorePlugin): d.exec_() def search(self, query, max_results=10, timeout=60): + # check for cyrilic symbols before performing search + uquery = unicode(query.strip(), 'utf-8') + reObj = re.search(u'^[а-яА-Я\\d]{4,}[а-яА-Я\\d\\s]*$', uquery) + if not reObj: + return base_url = 'http://chitanka.info' - url = base_url + '/search?q=' + urllib.quote(query) + url = base_url + '/search?q=' + urllib2.quote(query) counter = max_results # search for book title br = browser() - with closing(br.open(url, timeout=timeout)) as f: - f = unicode(f.read(), 'utf-8') - doc = html.fromstring(f) - - for data in doc.xpath('//ul[@class="superlist booklist"]/li'): - if counter <= 0: - break - - id = ''.join(data.xpath('.//a[@class="booklink"]/@href')).strip() - if not id: - continue - - counter -= 1 - - s = SearchResult() - s.cover_url = ''.join(data.xpath('.//a[@class="booklink"]/img/@src')).strip() - s.title = ''.join(data.xpath('.//a[@class="booklink"]/i/text()')).strip() - s.author = ''.join(data.xpath('.//span[@class="bookauthor"]/a/text()')).strip() - s.detail_item = id - s.drm = SearchResult.DRM_UNLOCKED - s.downloads['FB2'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-fb2"]/@href')).strip().replace('.zip', '') - s.downloads['EPUB'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-epub"]/@href')).strip().replace('.zip', '') - s.downloads['TXT'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-txt"]/@href')).strip().replace('.zip', '') - s.formats = 'FB2, EPUB, TXT, SFB' - yield s - - # search for author names - for data in doc.xpath('//ul[@class="superlist"][1]/li'): - author_url = ''.join(data.xpath('.//a[contains(@href,"/person/")]/@href')) - if counter <= 0: - break - - br2 = browser() - with closing(br2.open(base_url + author_url, timeout=timeout)) as f: - if counter <= 0: - break + try: + with closing(br.open(url, timeout=timeout)) as f: f = unicode(f.read(), 'utf-8') - doc2 = html.fromstring(f) + doc = html.fromstring(f) - # search for book title - for data in doc2.xpath('//ul[@class="superlist booklist"]/li'): + for data in doc.xpath('//ul[@class="superlist booklist"]/li'): if counter <= 0: break @@ -107,3 +78,51 @@ class ChitankaStore(BasicStoreConfig, StorePlugin): s.downloads['TXT'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-txt"]/@href')).strip().replace('.zip', '') s.formats = 'FB2, EPUB, TXT, SFB' yield s + except urllib2.HTTPError, e: + if e.code == 404: + return + else: + raise + + # search for author names + for data in doc.xpath('//ul[@class="superlist"][1]/li/dl/dt'): + author_url = ''.join(data.xpath('.//a[contains(@href,"/person/")]/@href')) + if author_url == '': + continue + if counter <= 0: + break + + br2 = browser() + with closing(br2.open(base_url + author_url, timeout=timeout)) as f: + if counter <= 0: + break + f = unicode(f.read(), 'utf-8') + doc2 = html.fromstring(f) + + # search for book title + for data in doc2.xpath('//ul[@class="superlist booklist"]/li'): + if counter <= 0: + break + + id = ''.join(data.xpath('.//a[@class="booklink"]/@href')).strip() + if not id: + continue + + title = ''.join(data.xpath('.//a[@class="booklink"]/i/text()')).strip() + author = ''.join(data.xpath('.//span[@class="bookauthor"]/a/text()')).strip() + if title.lower().find(query.lower()) == -1 and author.lower().find(query.lower()) == -1: + continue + + counter -= 1 + + s = SearchResult() + s.cover_url = ''.join(data.xpath('.//a[@class="booklink"]/img/@src')).strip() + s.title = title + s.author = author + s.detail_item = id + s.drm = SearchResult.DRM_UNLOCKED + s.downloads['FB2'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-fb2"]/@href')).strip().replace('.zip', '') + s.downloads['EPUB'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-epub"]/@href')).strip().replace('.zip', '') + s.downloads['TXT'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-txt"]/@href')).strip().replace('.zip', '') + s.formats = 'FB2, EPUB, TXT, SFB' + yield s diff --git a/src/calibre/gui2/store/stores/eknigi_plugin.py b/src/calibre/gui2/store/stores/eknigi_plugin.py index b2f5f170b6..ee87b771be 100644 --- a/src/calibre/gui2/store/stores/eknigi_plugin.py +++ b/src/calibre/gui2/store/stores/eknigi_plugin.py @@ -6,6 +6,7 @@ __license__ = 'GPL 3' __copyright__ = '2011, Alex Stanev ' __docformat__ = 'restructuredtext en' +import re import random import urllib2 from contextlib import closing @@ -45,8 +46,14 @@ class eKnigiStore(BasicStoreConfig, StorePlugin): d.exec_() def search(self, query, max_results=10, timeout=60): + # check for cyrilic symbols before performing search + uquery = unicode(query.strip(), 'utf-8') + reObj = re.search(u'^[а-яА-Я\\d]{2,}[а-яА-Я\\d\\s]*$', uquery) + if not reObj: + return + base_url = 'http://e-knigi.net' - url = base_url + '/virtuemart?page=shop.browse&search_category=0&search_limiter=anywhere&limitstart=0&limit=' + str(max_results) + '&keyword=' + urllib2.quote(query) + url = base_url + '/virtuemart?page=shop.browse&search_category=0&search_limiter=anywhere&keyword=' + urllib2.quote(query) br = browser() @@ -75,12 +82,18 @@ class eKnigiStore(BasicStoreConfig, StorePlugin): if not id: continue + title = ''.join(data.xpath('.//a[@class="gk_vm_product_image"]/img/@title')).strip() + author = ''.join(data.xpath('.//div[@style="float:left;width:90%"]/b/text()')).strip().replace('Автор: ', '') + + if title.lower().find(query.lower()) == -1 and author.lower().find(query.lower()) == -1: + continue + counter -= 1 s = SearchResult() s.cover_url = ''.join(data.xpath('.//a[@class="gk_vm_product_image"]/img/@src')).strip() - s.title = ''.join(data.xpath('.//a[@class="gk_vm_product_image"]/img/@title')).strip() - s.author = ''.join(data.xpath('.//div[@style="float:left;width:90%"]/b/text()')).strip().replace('Автор: ', '') + s.title = title + s.author = author s.price = ''.join(data.xpath('.//span[@class="productPrice"]/text()')).strip() s.detail_item = base_url + id s.drm = SearchResult.DRM_UNLOCKED From 519ee771b04ea615c806df2fd377decb59b2d238 Mon Sep 17 00:00:00 2001 From: Alex Stanev Date: Wed, 27 Jul 2011 21:34:49 +0300 Subject: [PATCH 2/4] Avoid doubleslash and collateral 404 --- src/calibre/gui2/store/stores/smashwords_plugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/gui2/store/stores/smashwords_plugin.py b/src/calibre/gui2/store/stores/smashwords_plugin.py index 7a7e756a05..5195eb1e0a 100644 --- a/src/calibre/gui2/store/stores/smashwords_plugin.py +++ b/src/calibre/gui2/store/stores/smashwords_plugin.py @@ -97,7 +97,7 @@ class SmashwordsStore(BasicStoreConfig, StorePlugin): yield s def get_details(self, search_result, timeout): - url = 'http://www.smashwords.com/' + url = 'http://www.smashwords.com' br = browser() with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf: From d5922391324eb12938cbae38582c4d03b715aa1a Mon Sep 17 00:00:00 2001 From: Alex Stanev Date: Thu, 28 Jul 2011 12:07:02 +0300 Subject: [PATCH 3/4] Fix Project Gutenberg store plugin --- .../gui2/store/stores/gutenberg_plugin.py | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/calibre/gui2/store/stores/gutenberg_plugin.py b/src/calibre/gui2/store/stores/gutenberg_plugin.py index ad30f2067d..5cc1005c39 100644 --- a/src/calibre/gui2/store/stores/gutenberg_plugin.py +++ b/src/calibre/gui2/store/stores/gutenberg_plugin.py @@ -22,10 +22,10 @@ from calibre.gui2.store.search_result import SearchResult from calibre.gui2.store.web_store_dialog import WebStoreDialog class GutenbergStore(BasicStoreConfig, StorePlugin): - + def open(self, parent=None, detail_item=None, external=False): url = 'http://gutenberg.org/' - + if detail_item: detail_item = url_slash_cleaner(url + detail_item) @@ -39,46 +39,49 @@ class GutenbergStore(BasicStoreConfig, StorePlugin): def search(self, query, max_results=10, timeout=60): url = 'http://m.gutenberg.org/ebooks/search.mobile/?default_prefix=all&sort_order=title&query=' + urllib.quote_plus(query) - + br = browser() - + counter = max_results with closing(br.open(url, timeout=timeout)) as f: doc = html.fromstring(f.read()) - for data in doc.xpath('//ol[@class="results"]//li[contains(@class, "icon_title")]'): + for data in doc.xpath('//ol[@class="results"]//li[@class="entry data icon_title"]'): if counter <= 0: break id = ''.join(data.xpath('./a/@href')) id = id.split('.mobile')[0] - + title = ''.join(data.xpath('.//span[@class="title"]/text()')) author = ''.join(data.xpath('.//span[@class="subtitle"]/text()')) - + + if title.lower().find(query.lower()) == -1 and author.lower().find(query.lower()) == -1: + continue + counter -= 1 - + s = SearchResult() s.cover_url = '' - + s.detail_item = id.strip() s.title = title.strip() s.author = author.strip() s.price = '$0.00' s.drm = SearchResult.DRM_UNLOCKED - + yield s def get_details(self, search_result, timeout): url = url_slash_cleaner('http://m.gutenberg.org/' + search_result.detail_item + '.mobile') - + br = browser() with closing(br.open(url, timeout=timeout)) as nf: doc = html.fromstring(nf.read()) - + for save_item in doc.xpath('//li[contains(@class, "icon_save")]/a'): type = save_item.get('type') href = save_item.get('href') - + if type: ext = mimetypes.guess_extension(type) if ext: From caaaa036df4c6b1793bb3fe88861d619b5319241 Mon Sep 17 00:00:00 2001 From: John Schember Date: Thu, 28 Jul 2011 07:16:45 -0400 Subject: [PATCH 4/4] Store: Fixes for Gutenberg plugin. --- src/calibre/gui2/store/stores/gutenberg_plugin.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/store/stores/gutenberg_plugin.py b/src/calibre/gui2/store/stores/gutenberg_plugin.py index ad30f2067d..64d8401f4e 100644 --- a/src/calibre/gui2/store/stores/gutenberg_plugin.py +++ b/src/calibre/gui2/store/stores/gutenberg_plugin.py @@ -45,7 +45,7 @@ class GutenbergStore(BasicStoreConfig, StorePlugin): counter = max_results with closing(br.open(url, timeout=timeout)) as f: doc = html.fromstring(f.read()) - for data in doc.xpath('//ol[@class="results"]//li[contains(@class, "icon_title")]'): + for data in doc.xpath('//ol[@class="results"]//li[contains(@class, "icon_title") and not(contains(@class, "toplink"))]'): if counter <= 0: break @@ -69,7 +69,7 @@ class GutenbergStore(BasicStoreConfig, StorePlugin): yield s def get_details(self, search_result, timeout): - url = url_slash_cleaner('http://m.gutenberg.org/' + search_result.detail_item + '.mobile') + url = url_slash_cleaner('http://m.gutenberg.org/' + search_result.detail_item) br = browser() with closing(br.open(url, timeout=timeout)) as nf: