Get Books: Add EscapeMagazine.pl and RW2010.pl stores

2025-07-09 03:04:10 -04:00 · 2011-08-29 17:03:44 -06:00 · 2011-08-29 17:03:44 -06:00 · 3bd6dc3359
commit 3bd6dc3359
parent 52d1bac2d8 b9f9cb716b
8 changed files with 196 additions and 20 deletions
--- a/recipes/hackernews.recipe
+++ b/recipes/hackernews.recipe
@ -5,7 +5,6 @@ __license__   = 'GPL v3'
 Hacker News
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag
 from calibre.ptempfile import PersistentTemporaryFile
 from urlparse import urlparse
 import re
@ -46,20 +45,20 @@ class HackerNews(BasicNewsRecipe):
        self.log('get_hn_content(' + url + ')')
        soup = self.index_to_soup(url)
        main = soup.find('tr').findNextSiblings('tr', limit=2)[1].td
-        
+
        title = self.tag_to_string(main.find('td', 'title'))
        link = main.find('td', 'title').find('a')['href']
        if link.startswith('item?'):
            link = 'http://news.ycombinator.com/' + link
        readable_link = link.rpartition('http://')[2].rpartition('https://')[2]
        subtext = self.tag_to_string(main.find('td', 'subtext'))
-        
+
        title_content_td = main.find('td', 'title').findParent('tr').findNextSiblings('tr', limit=3)[2].findAll('td', limit=2)[1]
        title_content = u''
        if not title_content_td.find('form'):
            title_content_td.name ='div'
            title_content = title_content_td.prettify()
-        
+
        comments = u''
        for td in main.findAll('td', 'default'):
            comhead = td.find('span', 'comhead')
@ -76,11 +75,11 @@ class HackerNews(BasicNewsRecipe):
                indent_width = (int(td.parent.find('td').img['width']) * 2) / 3
                td['style'] = 'padding-left: ' + str(indent_width) + 'px'
                comments = comments + com_title + td.prettify()
-        
+
        body = u'<h3>' + title + u'</h3><p><a href="' + link + u'">' + readable_link + u'</a><br/><strong>' + subtext +  u'</strong></p>' + title_content + u'<br/>'
        body = body + comments
        return u'<html><title>' + title + u'</title><body>' + body + '</body></html>'
-        
+
    def get_obfuscated_article(self, url):
        if url.startswith('http://news.ycombinator.com'):
            content = self.get_hn_content(url)
@ -114,11 +113,11 @@ class HackerNews(BasicNewsRecipe):
    def populate_article_metadata(self, article, soup, first):
        article.text_summary = self.prettyify_url(article.url)
        article.summary = article.text_summary
-        
+
 #    def parse_index(self):
 #        feeds = []
 #        feeds.append((u'Hacker News',[{'title': 'Testing', 'url': 'http://news.ycombinator.com/item?id=2935944'}]))
 #        return feeds
-        
+
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -1277,6 +1277,17 @@ class StoreEKnigiStore(StoreBase):
    formats = ['EPUB', 'PDF', 'HTML']
    affiliate = True
 class StoreEscapeMagazineStore(StoreBase):
    name = 'EscapeMagazine'
    author = u'Tomasz Długosz'
    description = u'Książki elektroniczne w formie pliku komputerowego PDF. Zabezpieczone hasłem.'
    actual_plugin = 'calibre.gui2.store.stores.escapemagazine_plugin:EscapeMagazineStore'
    drm_free_only = True
    headquarters = 'PL'
    formats = ['PDF']
    affiliate = True
 class StoreFeedbooksStore(StoreBase):
    name = 'Feedbooks'
    description = u'Feedbooks is a cloud publishing and distribution service, connected to a large ecosystem of reading systems and social networks. Provides a variety of genres from independent and classic books.'
@ -1415,6 +1426,16 @@ class StorePragmaticBookshelfStore(StoreBase):
    headquarters = 'US'
    formats = ['EPUB', 'MOBI', 'PDF']
 class StoreRW2010Store(StoreBase):
    name = 'RW2010'
    description = u'Polski serwis self-publishingowy. Pliki PDF, EPUB i MOBI. Maksymalna cena utworu nie przekracza u nas 10 złotych!'
    actual_plugin = 'calibre.gui2.store.stores.rw2010_plugin:RW2010Store'
    author = u'Tomasz Długosz'
    drm_free_only = True
    headquarters = 'PL'
    formats = ['EPUB', 'MOBI', 'PDF']
 class StoreSmashwordsStore(StoreBase):
    name = 'Smashwords'
    description = u'An ebook publishing and distribution platform for ebook authors, publishers and readers. Covers many genres and formats.'
@ -1483,7 +1504,7 @@ class XinXiiStore(StoreBase):
    name = 'XinXii'
    description = ''
    actual_plugin = 'calibre.gui2.store.stores.xinxii_plugin:XinXiiStore'
-    
+
    headquarters = 'DE'
    formats = ['EPUB', 'PDF']
@ -1513,6 +1534,7 @@ plugins += [
    StoreEBookShoppeUKStore,
    StoreEHarlequinStore,
    StoreEKnigiStore,
    StoreEscapeMagazineStore,
    StoreFeedbooksStore,
    StoreFoylesUKStore,
    StoreGandalfStore,
@ -1528,6 +1550,7 @@ plugins += [
    StoreOReillyStore,
    StoreOzonRUStore,
    StorePragmaticBookshelfStore,
    StoreRW2010Store,
    StoreSmashwordsStore,
    StoreVirtualoStore,
    StoreWaterstonesUKStore,
--- a/src/calibre/gui2/store/stores/bewrite_plugin.py
+++ b/src/calibre/gui2/store/stores/bewrite_plugin.py
@ -80,10 +80,13 @@ class BeWriteStore(BasicStoreConfig, StorePlugin):
            price = '$' + price.split('$')[-1]
            search_result.price = price.strip()
-            cover_img = idata.xpath('//div[@id="content"]//img[1]/@src')
+            cover_img = idata.xpath('//div[@id="content"]//img/@src')
            if cover_img:
-                cover_url = 'http://www.bewrite.net/mm5/' + cover_img[0]
+                for i in cover_img:
-                search_result.cover_url = cover_url.strip()
+                    if '00001' in i:
                        cover_url = 'http://www.bewrite.net/mm5/' + i
                        search_result.cover_url = cover_url.strip()
                        break
            formats = set([])
            if idata.xpath('boolean(//div[@id="content"]//td[contains(text(), "ePub")])'):
--- a/src/calibre/gui2/store/stores/escapemagazine_plugin.py
+++ b/src/calibre/gui2/store/stores/escapemagazine_plugin.py
@ -0,0 +1,70 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import, print_function)
 __license__ = 'GPL 3'
 __copyright__ = '2011, Tomasz Długosz <tomek3d@gmail.com>'
 __docformat__ = 'restructuredtext en'
 import urllib
 from contextlib import closing
 from lxml import html
 from PyQt4.Qt import QUrl
 from calibre import browser, url_slash_cleaner
 from calibre.gui2 import open_url
 from calibre.gui2.store import StorePlugin
 from calibre.gui2.store.basic_config import BasicStoreConfig
 from calibre.gui2.store.search_result import SearchResult
 from calibre.gui2.store.web_store_dialog import WebStoreDialog
 class EscapeMagazineStore(BasicStoreConfig, StorePlugin):
    def open(self, parent=None, detail_item=None, external=False):
        pid = '44010'
        url = 'http://www.escapemagazine.pl/s/' + pid
        if external or self.config.get('open_external', False):
            open_url(QUrl(url_slash_cleaner(detail_item + '/s/' + pid if detail_item else url)))
        else:
            d = WebStoreDialog(self.gui, url, parent, detail_item)
            d.setWindowTitle(self.name)
            d.set_tags(self.config.get('tags', ''))
            d.exec_()
    def search(self, query, max_results=20, timeout=60):
        url = 'http://www.escapemagazine.pl/wyszukiwarka?query=' + urllib.quote_plus(query)
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@class="item item_short"]'):
                if counter <= 0:
                    break
                id = ''.join(data.xpath('.//h2[@class="title"]/a[1]/@href'))
                if not id:
                    continue
                title = ''.join(data.xpath('.//h2[@class="title"]/a[1]/text()'))
                author = ''.join(data.xpath('.//div[@class="author"]/text()'))
                price = ''.join(data.xpath('.//span[@class="price_now"]/strong/text()')) + ' zł'
                cover_url = ''.join(data.xpath('.//img[@class="cover"]/@src'))
                counter -= 1
                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.detail_item = 'http://www.escapemagazine.pl' + id.strip()
                s.drm = SearchResult.DRM_UNLOCKED
                s.formats = 'PDF'
                yield s
--- a/src/calibre/gui2/store/stores/google_books_plugin.py
+++ b/src/calibre/gui2/store/stores/google_books_plugin.py
@ -68,8 +68,8 @@ class GoogleBooksStore(BasicStoreConfig, StorePlugin):
                    continue
                title = ''.join(data.xpath('.//h3/a//text()'))
-                authors = data.xpath('.//span[@class="gl"]//a//text()')
+                authors = data.xpath('.//span[@class="f"]//a//text()')
-                if authors[-1].strip().lower() in ('preview', 'read'):
+                if authors and authors[-1].strip().lower() in ('preview', 'read'):
                    authors = authors[:-1]
                else:
                    continue
@ -98,6 +98,8 @@ class GoogleBooksStore(BasicStoreConfig, StorePlugin):
            if not price.strip():
                price = ''.join(doc.xpath('//div[@class="buy-container"]/a/text()'))
                price = price.split('-')[-1]
            if 'view' in price.lower():
                price = 'Unknown'
            # No price set for this book.
            if not price.strip():
                price = '$0.00'
--- a/src/calibre/gui2/store/stores/manybooks_plugin.py
+++ b/src/calibre/gui2/store/stores/manybooks_plugin.py
@ -49,7 +49,9 @@ class ManyBooksStore(BasicStoreConfig, OpenSearchOPDSStore):
        counter = max_results
        br = browser()
        with closing(br.open(url, timeout=timeout)) as f:
-            doc = etree.fromstring(f.read())
+            raw_data = f.read()
            raw_data = raw_data.decode('utf-8', 'replace')
            doc = etree.fromstring(raw_data)
            for data in doc.xpath('//*[local-name() = "entry"]'):
                if counter <= 0:
                    break
--- a/src/calibre/gui2/store/stores/oreilly_plugin.py
+++ b/src/calibre/gui2/store/stores/oreilly_plugin.py
@ -45,20 +45,20 @@ class OReillyStore(BasicStoreConfig, StorePlugin):
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
-            for data in doc.xpath('//div[@id="results"]/div[@class="result"]'):
+            for data in doc.xpath('//div[@class="result"]'):
                if counter <= 0:
                    break
-                full_id = ''.join(data.xpath('.//div[@class="title"]/a/@href'))
+                full_id = ''.join(data.xpath('./div[@class="book_text"]//p[@class="title"]/a/@href'))
                mo = re.search('\d+', full_id)
                if not mo:
                    continue
                id = mo.group()
-                cover_url = ''.join(data.xpath('.//div[@class="bigCover"]//img/@src'))
+                cover_url = ''.join(data.xpath('./a/img[1]/@src'))
-                title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
+                title = ''.join(data.xpath('./div[@class="book_text"]/p[@class="title"]/a/text()'))
-                author = ''.join(data.xpath('.//div[@class="author"]/text()'))
+                author = ''.join(data.xpath('./div[@class="book_text"]/p[@class="note"][1]/text()'))
                author = author.split('By ')[-1].strip()
                # Get the detail here because we need to get the ebook id for the detail_item.
--- a/src/calibre/gui2/store/stores/rw2010_plugin.py
+++ b/src/calibre/gui2/store/stores/rw2010_plugin.py
@ -0,0 +1,77 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import, print_function)
 __license__ = 'GPL 3'
 __copyright__ = '2011, Tomasz Długosz <tomek3d@gmail.com>'
 __docformat__ = 'restructuredtext en'
 import re
 import urllib
 from contextlib import closing
 from lxml import html
 from PyQt4.Qt import QUrl
 from calibre import browser, url_slash_cleaner
 from calibre.gui2 import open_url
 from calibre.gui2.store import StorePlugin
 from calibre.gui2.store.basic_config import BasicStoreConfig
 from calibre.gui2.store.search_result import SearchResult
 from calibre.gui2.store.web_store_dialog import WebStoreDialog
 class RW2010Store(BasicStoreConfig, StorePlugin):
    def open(self, parent=None, detail_item=None, external=False):
        url = 'http://www.rw2010.pl/'
        if external or self.config.get('open_external', False):
            open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url)))
        else:
            d = WebStoreDialog(self.gui, url, parent, detail_item)
            d.setWindowTitle(self.name)
            d.set_tags(self.config.get('tags', ''))
            d.exec_()
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.rw2010.pl/go.live.php/?launch_macro=catalogue-search-rd'
        values={
            'fkeyword': query,
            'file_type':''
            }
        br = browser()
        counter = max_results
        with closing(br.open(url, data=urllib.urlencode(values), timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@class="ProductDetail"]'):
                if counter <= 0:
                    break
                id = ''.join(data.xpath('.//div[@class="img"]/a/@href'))
                if not id:
                    continue
                with closing(br.open(id.strip(), timeout=timeout/4)) as nf:
                    idata = html.fromstring(nf.read())
                    cover_url = ''.join(idata.xpath('//div[@class="boxa"]//div[@class="img"]/img/@src'))
                    author = ''.join(idata.xpath('//div[@class="boxb"]//h3[text()="Autor: "]/span/text()'))
                    title = ''.join(idata.xpath('//div[@class="boxb"]/h2[1]/text()'))
                    title = re.sub(r'\(#.+\)', '', title)
                    formats = ''.join(idata.xpath('//div[@class="boxb"]//h3[text()="Format pliku: "]/span/text()'))
                    price = ''.join(idata.xpath('//div[@class="price-box"]/span/text()')) + ',00 zł'
                counter -= 1
                s = SearchResult()
                s.cover_url = 'http://www.rw2010.pl/' + cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.detail_item = re.sub(r'%3D', '=', id)
                s.drm = SearchResult.DRM_UNLOCKED
                s.formats = formats[0:-2].upper()
                yield s