Get Books: Add EscapeMagazine.pl and RW2010.pl stores

This commit is contained in:
Kovid Goyal 2011-08-29 17:03:44 -06:00
commit 3bd6dc3359
8 changed files with 196 additions and 20 deletions

View File

@ -5,7 +5,6 @@ __license__ = 'GPL v3'
Hacker News Hacker News
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from urlparse import urlparse from urlparse import urlparse
import re import re
@ -46,20 +45,20 @@ class HackerNews(BasicNewsRecipe):
self.log('get_hn_content(' + url + ')') self.log('get_hn_content(' + url + ')')
soup = self.index_to_soup(url) soup = self.index_to_soup(url)
main = soup.find('tr').findNextSiblings('tr', limit=2)[1].td main = soup.find('tr').findNextSiblings('tr', limit=2)[1].td
title = self.tag_to_string(main.find('td', 'title')) title = self.tag_to_string(main.find('td', 'title'))
link = main.find('td', 'title').find('a')['href'] link = main.find('td', 'title').find('a')['href']
if link.startswith('item?'): if link.startswith('item?'):
link = 'http://news.ycombinator.com/' + link link = 'http://news.ycombinator.com/' + link
readable_link = link.rpartition('http://')[2].rpartition('https://')[2] readable_link = link.rpartition('http://')[2].rpartition('https://')[2]
subtext = self.tag_to_string(main.find('td', 'subtext')) subtext = self.tag_to_string(main.find('td', 'subtext'))
title_content_td = main.find('td', 'title').findParent('tr').findNextSiblings('tr', limit=3)[2].findAll('td', limit=2)[1] title_content_td = main.find('td', 'title').findParent('tr').findNextSiblings('tr', limit=3)[2].findAll('td', limit=2)[1]
title_content = u'' title_content = u''
if not title_content_td.find('form'): if not title_content_td.find('form'):
title_content_td.name ='div' title_content_td.name ='div'
title_content = title_content_td.prettify() title_content = title_content_td.prettify()
comments = u'' comments = u''
for td in main.findAll('td', 'default'): for td in main.findAll('td', 'default'):
comhead = td.find('span', 'comhead') comhead = td.find('span', 'comhead')
@ -76,11 +75,11 @@ class HackerNews(BasicNewsRecipe):
indent_width = (int(td.parent.find('td').img['width']) * 2) / 3 indent_width = (int(td.parent.find('td').img['width']) * 2) / 3
td['style'] = 'padding-left: ' + str(indent_width) + 'px' td['style'] = 'padding-left: ' + str(indent_width) + 'px'
comments = comments + com_title + td.prettify() comments = comments + com_title + td.prettify()
body = u'<h3>' + title + u'</h3><p><a href="' + link + u'">' + readable_link + u'</a><br/><strong>' + subtext + u'</strong></p>' + title_content + u'<br/>' body = u'<h3>' + title + u'</h3><p><a href="' + link + u'">' + readable_link + u'</a><br/><strong>' + subtext + u'</strong></p>' + title_content + u'<br/>'
body = body + comments body = body + comments
return u'<html><title>' + title + u'</title><body>' + body + '</body></html>' return u'<html><title>' + title + u'</title><body>' + body + '</body></html>'
def get_obfuscated_article(self, url): def get_obfuscated_article(self, url):
if url.startswith('http://news.ycombinator.com'): if url.startswith('http://news.ycombinator.com'):
content = self.get_hn_content(url) content = self.get_hn_content(url)
@ -114,11 +113,11 @@ class HackerNews(BasicNewsRecipe):
def populate_article_metadata(self, article, soup, first): def populate_article_metadata(self, article, soup, first):
article.text_summary = self.prettyify_url(article.url) article.text_summary = self.prettyify_url(article.url)
article.summary = article.text_summary article.summary = article.text_summary
# def parse_index(self): # def parse_index(self):
# feeds = [] # feeds = []
# feeds.append((u'Hacker News',[{'title': 'Testing', 'url': 'http://news.ycombinator.com/item?id=2935944'}])) # feeds.append((u'Hacker News',[{'title': 'Testing', 'url': 'http://news.ycombinator.com/item?id=2935944'}]))
# return feeds # return feeds

View File

@ -1277,6 +1277,17 @@ class StoreEKnigiStore(StoreBase):
formats = ['EPUB', 'PDF', 'HTML'] formats = ['EPUB', 'PDF', 'HTML']
affiliate = True affiliate = True
class StoreEscapeMagazineStore(StoreBase):
name = 'EscapeMagazine'
author = u'Tomasz Długosz'
description = u'Książki elektroniczne w formie pliku komputerowego PDF. Zabezpieczone hasłem.'
actual_plugin = 'calibre.gui2.store.stores.escapemagazine_plugin:EscapeMagazineStore'
drm_free_only = True
headquarters = 'PL'
formats = ['PDF']
affiliate = True
class StoreFeedbooksStore(StoreBase): class StoreFeedbooksStore(StoreBase):
name = 'Feedbooks' name = 'Feedbooks'
description = u'Feedbooks is a cloud publishing and distribution service, connected to a large ecosystem of reading systems and social networks. Provides a variety of genres from independent and classic books.' description = u'Feedbooks is a cloud publishing and distribution service, connected to a large ecosystem of reading systems and social networks. Provides a variety of genres from independent and classic books.'
@ -1415,6 +1426,16 @@ class StorePragmaticBookshelfStore(StoreBase):
headquarters = 'US' headquarters = 'US'
formats = ['EPUB', 'MOBI', 'PDF'] formats = ['EPUB', 'MOBI', 'PDF']
class StoreRW2010Store(StoreBase):
name = 'RW2010'
description = u'Polski serwis self-publishingowy. Pliki PDF, EPUB i MOBI. Maksymalna cena utworu nie przekracza u nas 10 złotych!'
actual_plugin = 'calibre.gui2.store.stores.rw2010_plugin:RW2010Store'
author = u'Tomasz Długosz'
drm_free_only = True
headquarters = 'PL'
formats = ['EPUB', 'MOBI', 'PDF']
class StoreSmashwordsStore(StoreBase): class StoreSmashwordsStore(StoreBase):
name = 'Smashwords' name = 'Smashwords'
description = u'An ebook publishing and distribution platform for ebook authors, publishers and readers. Covers many genres and formats.' description = u'An ebook publishing and distribution platform for ebook authors, publishers and readers. Covers many genres and formats.'
@ -1483,7 +1504,7 @@ class XinXiiStore(StoreBase):
name = 'XinXii' name = 'XinXii'
description = '' description = ''
actual_plugin = 'calibre.gui2.store.stores.xinxii_plugin:XinXiiStore' actual_plugin = 'calibre.gui2.store.stores.xinxii_plugin:XinXiiStore'
headquarters = 'DE' headquarters = 'DE'
formats = ['EPUB', 'PDF'] formats = ['EPUB', 'PDF']
@ -1513,6 +1534,7 @@ plugins += [
StoreEBookShoppeUKStore, StoreEBookShoppeUKStore,
StoreEHarlequinStore, StoreEHarlequinStore,
StoreEKnigiStore, StoreEKnigiStore,
StoreEscapeMagazineStore,
StoreFeedbooksStore, StoreFeedbooksStore,
StoreFoylesUKStore, StoreFoylesUKStore,
StoreGandalfStore, StoreGandalfStore,
@ -1528,6 +1550,7 @@ plugins += [
StoreOReillyStore, StoreOReillyStore,
StoreOzonRUStore, StoreOzonRUStore,
StorePragmaticBookshelfStore, StorePragmaticBookshelfStore,
StoreRW2010Store,
StoreSmashwordsStore, StoreSmashwordsStore,
StoreVirtualoStore, StoreVirtualoStore,
StoreWaterstonesUKStore, StoreWaterstonesUKStore,

View File

@ -80,10 +80,13 @@ class BeWriteStore(BasicStoreConfig, StorePlugin):
price = '$' + price.split('$')[-1] price = '$' + price.split('$')[-1]
search_result.price = price.strip() search_result.price = price.strip()
cover_img = idata.xpath('//div[@id="content"]//img[1]/@src') cover_img = idata.xpath('//div[@id="content"]//img/@src')
if cover_img: if cover_img:
cover_url = 'http://www.bewrite.net/mm5/' + cover_img[0] for i in cover_img:
search_result.cover_url = cover_url.strip() if '00001' in i:
cover_url = 'http://www.bewrite.net/mm5/' + i
search_result.cover_url = cover_url.strip()
break
formats = set([]) formats = set([])
if idata.xpath('boolean(//div[@id="content"]//td[contains(text(), "ePub")])'): if idata.xpath('boolean(//div[@id="content"]//td[contains(text(), "ePub")])'):

View File

@ -0,0 +1,70 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2011, Tomasz Długosz <tomek3d@gmail.com>'
__docformat__ = 'restructuredtext en'
import urllib
from contextlib import closing
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser, url_slash_cleaner
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
class EscapeMagazineStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
pid = '44010'
url = 'http://www.escapemagazine.pl/s/' + pid
if external or self.config.get('open_external', False):
open_url(QUrl(url_slash_cleaner(detail_item + '/s/' + pid if detail_item else url)))
else:
d = WebStoreDialog(self.gui, url, parent, detail_item)
d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', ''))
d.exec_()
def search(self, query, max_results=20, timeout=60):
url = 'http://www.escapemagazine.pl/wyszukiwarka?query=' + urllib.quote_plus(query)
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//div[@class="item item_short"]'):
if counter <= 0:
break
id = ''.join(data.xpath('.//h2[@class="title"]/a[1]/@href'))
if not id:
continue
title = ''.join(data.xpath('.//h2[@class="title"]/a[1]/text()'))
author = ''.join(data.xpath('.//div[@class="author"]/text()'))
price = ''.join(data.xpath('.//span[@class="price_now"]/strong/text()')) + ''
cover_url = ''.join(data.xpath('.//img[@class="cover"]/@src'))
counter -= 1
s = SearchResult()
s.cover_url = cover_url
s.title = title.strip()
s.author = author.strip()
s.price = price
s.detail_item = 'http://www.escapemagazine.pl' + id.strip()
s.drm = SearchResult.DRM_UNLOCKED
s.formats = 'PDF'
yield s

View File

@ -68,8 +68,8 @@ class GoogleBooksStore(BasicStoreConfig, StorePlugin):
continue continue
title = ''.join(data.xpath('.//h3/a//text()')) title = ''.join(data.xpath('.//h3/a//text()'))
authors = data.xpath('.//span[@class="gl"]//a//text()') authors = data.xpath('.//span[@class="f"]//a//text()')
if authors[-1].strip().lower() in ('preview', 'read'): if authors and authors[-1].strip().lower() in ('preview', 'read'):
authors = authors[:-1] authors = authors[:-1]
else: else:
continue continue
@ -98,6 +98,8 @@ class GoogleBooksStore(BasicStoreConfig, StorePlugin):
if not price.strip(): if not price.strip():
price = ''.join(doc.xpath('//div[@class="buy-container"]/a/text()')) price = ''.join(doc.xpath('//div[@class="buy-container"]/a/text()'))
price = price.split('-')[-1] price = price.split('-')[-1]
if 'view' in price.lower():
price = 'Unknown'
# No price set for this book. # No price set for this book.
if not price.strip(): if not price.strip():
price = '$0.00' price = '$0.00'

View File

@ -49,7 +49,9 @@ class ManyBooksStore(BasicStoreConfig, OpenSearchOPDSStore):
counter = max_results counter = max_results
br = browser() br = browser()
with closing(br.open(url, timeout=timeout)) as f: with closing(br.open(url, timeout=timeout)) as f:
doc = etree.fromstring(f.read()) raw_data = f.read()
raw_data = raw_data.decode('utf-8', 'replace')
doc = etree.fromstring(raw_data)
for data in doc.xpath('//*[local-name() = "entry"]'): for data in doc.xpath('//*[local-name() = "entry"]'):
if counter <= 0: if counter <= 0:
break break

View File

@ -45,20 +45,20 @@ class OReillyStore(BasicStoreConfig, StorePlugin):
counter = max_results counter = max_results
with closing(br.open(url, timeout=timeout)) as f: with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read()) doc = html.fromstring(f.read())
for data in doc.xpath('//div[@id="results"]/div[@class="result"]'): for data in doc.xpath('//div[@class="result"]'):
if counter <= 0: if counter <= 0:
break break
full_id = ''.join(data.xpath('.//div[@class="title"]/a/@href')) full_id = ''.join(data.xpath('./div[@class="book_text"]//p[@class="title"]/a/@href'))
mo = re.search('\d+', full_id) mo = re.search('\d+', full_id)
if not mo: if not mo:
continue continue
id = mo.group() id = mo.group()
cover_url = ''.join(data.xpath('.//div[@class="bigCover"]//img/@src')) cover_url = ''.join(data.xpath('./a/img[1]/@src'))
title = ''.join(data.xpath('.//div[@class="title"]/a/text()')) title = ''.join(data.xpath('./div[@class="book_text"]/p[@class="title"]/a/text()'))
author = ''.join(data.xpath('.//div[@class="author"]/text()')) author = ''.join(data.xpath('./div[@class="book_text"]/p[@class="note"][1]/text()'))
author = author.split('By ')[-1].strip() author = author.split('By ')[-1].strip()
# Get the detail here because we need to get the ebook id for the detail_item. # Get the detail here because we need to get the ebook id for the detail_item.

View File

@ -0,0 +1,77 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2011, Tomasz Długosz <tomek3d@gmail.com>'
__docformat__ = 'restructuredtext en'
import re
import urllib
from contextlib import closing
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser, url_slash_cleaner
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
class RW2010Store(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
url = 'http://www.rw2010.pl/'
if external or self.config.get('open_external', False):
open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url)))
else:
d = WebStoreDialog(self.gui, url, parent, detail_item)
d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', ''))
d.exec_()
def search(self, query, max_results=10, timeout=60):
url = 'http://www.rw2010.pl/go.live.php/?launch_macro=catalogue-search-rd'
values={
'fkeyword': query,
'file_type':''
}
br = browser()
counter = max_results
with closing(br.open(url, data=urllib.urlencode(values), timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//div[@class="ProductDetail"]'):
if counter <= 0:
break
id = ''.join(data.xpath('.//div[@class="img"]/a/@href'))
if not id:
continue
with closing(br.open(id.strip(), timeout=timeout/4)) as nf:
idata = html.fromstring(nf.read())
cover_url = ''.join(idata.xpath('//div[@class="boxa"]//div[@class="img"]/img/@src'))
author = ''.join(idata.xpath('//div[@class="boxb"]//h3[text()="Autor: "]/span/text()'))
title = ''.join(idata.xpath('//div[@class="boxb"]/h2[1]/text()'))
title = re.sub(r'\(#.+\)', '', title)
formats = ''.join(idata.xpath('//div[@class="boxb"]//h3[text()="Format pliku: "]/span/text()'))
price = ''.join(idata.xpath('//div[@class="price-box"]/span/text()')) + ',00 zł'
counter -= 1
s = SearchResult()
s.cover_url = 'http://www.rw2010.pl/' + cover_url
s.title = title.strip()
s.author = author.strip()
s.price = price
s.detail_item = re.sub(r'%3D', '=', id)
s.drm = SearchResult.DRM_UNLOCKED
s.formats = formats[0:-2].upper()
yield s