From 467f0be76fa62b16c526119a4da15044bda7d14b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Sun, 10 Jun 2012 12:12:26 +0200 Subject: [PATCH 1/2] draft version of improved virtualo plugin --- .../gui2/store/stores/virtualo_plugin.py | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src/calibre/gui2/store/stores/virtualo_plugin.py b/src/calibre/gui2/store/stores/virtualo_plugin.py index e1d86b4d8b..f88ed9e96b 100644 --- a/src/calibre/gui2/store/stores/virtualo_plugin.py +++ b/src/calibre/gui2/store/stores/virtualo_plugin.py @@ -40,29 +40,25 @@ class VirtualoStore(BasicStoreConfig, StorePlugin): url = 'http://virtualo.pl/?q=' + urllib.quote(query) + '&f=format_id:4,6,3' br = browser() - drm_pattern = re.compile("ADE") + no_drm_pattern = re.compile("Znak wodny") counter = max_results with closing(br.open(url, timeout=timeout)) as f: doc = html.fromstring(f.read()) - for data in doc.xpath('//div[@id="product_list"]/div/div[@class="column"]'): + for data in doc.xpath('//div[@id="content"]//div[@class="list_box list_box_border"]'): if counter <= 0: break - id = ''.join(data.xpath('.//table/tr[1]/td[1]/a/@href')) + id = ''.join(data.xpath('.//div[@class="list_middle_left"]//a/@href')) if not id: continue price = ''.join(data.xpath('.//span[@class="price"]/text() | .//span[@class="price abbr"]/text()')) - cover_url = ''.join(data.xpath('.//table/tr[1]/td[1]/a/img/@src')) - title = ''.join(data.xpath('.//div[@class="title"]/a/text()')) - title = re.sub(r'\ WM', '', title) - author = ', '.join(data.xpath('.//div[@class="authors"]/a/text()')) - formats = ', '.join(data.xpath('.//span[@class="format"]/a/text()')) - formats = re.sub(r'(, )?ONLINE(, )?', '', formats) - drm = drm_pattern.search(formats) - formats = re.sub(r'(, )?ADE(, )?', '', formats) - formats = re.sub(r'\ WM', '', formats) + cover_url = ''.join(data.xpath('.//div[@class="list_middle_left"]//a/img/@src')) + title = ''.join(data.xpath('.//div[@class="list_title list_text_left"]/a/text()')) + author = ', '.join(data.xpath('.//div[@class="list_authors list_text_left"]/a/text()')) + formats = ', '#.join(data.xpath('.//div[2]/div[3]//div[@class="list_vertical_wrap_middle"]/img/@src()')) + drm = not no_drm_pattern.search(formats) counter -= 1 From b514803f43cb29a665651621bd58ab78967e4243 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Sun, 10 Jun 2012 15:57:22 +0200 Subject: [PATCH 2/2] virtualo plugin catches up with website upgrade --- src/calibre/gui2/store/stores/virtualo_plugin.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/calibre/gui2/store/stores/virtualo_plugin.py b/src/calibre/gui2/store/stores/virtualo_plugin.py index f88ed9e96b..e6b60fbe91 100644 --- a/src/calibre/gui2/store/stores/virtualo_plugin.py +++ b/src/calibre/gui2/store/stores/virtualo_plugin.py @@ -57,18 +57,18 @@ class VirtualoStore(BasicStoreConfig, StorePlugin): cover_url = ''.join(data.xpath('.//div[@class="list_middle_left"]//a/img/@src')) title = ''.join(data.xpath('.//div[@class="list_title list_text_left"]/a/text()')) author = ', '.join(data.xpath('.//div[@class="list_authors list_text_left"]/a/text()')) - formats = ', '#.join(data.xpath('.//div[2]/div[3]//div[@class="list_vertical_wrap_middle"]/img/@src()')) - drm = not no_drm_pattern.search(formats) + formats = [ form.split('_')[-1].replace('.png', '') for form in data.xpath('.//div[@style="width:55%;float:left;text-align:left;height:18px;"]//img/@src')] + nodrm = no_drm_pattern.search(''.join(data.xpath('.//div[@style="width:45%;float:right;text-align:right;height:18px;"]/div/div/text()'))) counter -= 1 s = SearchResult() s.cover_url = cover_url.split('.jpg')[0] + '.jpg' - s.title = title.strip() + ' ' + formats + s.title = title.strip() s.author = author.strip() s.price = price + ' zł' s.detail_item = 'http://virtualo.pl' + id.strip().split('http://')[0] - s.formats = formats.upper().strip() - s.drm = SearchResult.DRM_LOCKED if drm else SearchResult.DRM_UNLOCKED + s.formats = ', '.join(formats).upper() + s.drm = SearchResult.DRM_UNLOCKED if nodrm else SearchResult.DRM_UNKNOWN yield s