diff --git a/recipes/hackernews.recipe b/recipes/hackernews.recipe index 9ec26c68eb..8d80008467 100644 --- a/recipes/hackernews.recipe +++ b/recipes/hackernews.recipe @@ -5,7 +5,6 @@ __license__ = 'GPL v3' Hacker News ''' from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag from calibre.ptempfile import PersistentTemporaryFile from urlparse import urlparse import re @@ -46,20 +45,20 @@ class HackerNews(BasicNewsRecipe): self.log('get_hn_content(' + url + ')') soup = self.index_to_soup(url) main = soup.find('tr').findNextSiblings('tr', limit=2)[1].td - + title = self.tag_to_string(main.find('td', 'title')) link = main.find('td', 'title').find('a')['href'] if link.startswith('item?'): link = 'http://news.ycombinator.com/' + link readable_link = link.rpartition('http://')[2].rpartition('https://')[2] subtext = self.tag_to_string(main.find('td', 'subtext')) - + title_content_td = main.find('td', 'title').findParent('tr').findNextSiblings('tr', limit=3)[2].findAll('td', limit=2)[1] title_content = u'' if not title_content_td.find('form'): title_content_td.name ='div' title_content = title_content_td.prettify() - + comments = u'' for td in main.findAll('td', 'default'): comhead = td.find('span', 'comhead') @@ -76,11 +75,11 @@ class HackerNews(BasicNewsRecipe): indent_width = (int(td.parent.find('td').img['width']) * 2) / 3 td['style'] = 'padding-left: ' + str(indent_width) + 'px' comments = comments + com_title + td.prettify() - + body = u'

' + title + u'

' + readable_link + u'
' + subtext + u'

' + title_content + u'
' body = body + comments return u'' + title + u'' + body + '' - + def get_obfuscated_article(self, url): if url.startswith('http://news.ycombinator.com'): content = self.get_hn_content(url) @@ -114,11 +113,11 @@ class HackerNews(BasicNewsRecipe): def populate_article_metadata(self, article, soup, first): article.text_summary = self.prettyify_url(article.url) article.summary = article.text_summary - + # def parse_index(self): # feeds = [] # feeds.append((u'Hacker News',[{'title': 'Testing', 'url': 'http://news.ycombinator.com/item?id=2935944'}])) # return feeds - + diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 70aa7c90fa..53c6cba00a 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -1277,6 +1277,17 @@ class StoreEKnigiStore(StoreBase): formats = ['EPUB', 'PDF', 'HTML'] affiliate = True +class StoreEscapeMagazineStore(StoreBase): + name = 'EscapeMagazine' + author = u'Tomasz Długosz' + description = u'Książki elektroniczne w formie pliku komputerowego PDF. Zabezpieczone hasłem.' + actual_plugin = 'calibre.gui2.store.stores.escapemagazine_plugin:EscapeMagazineStore' + + drm_free_only = True + headquarters = 'PL' + formats = ['PDF'] + affiliate = True + class StoreFeedbooksStore(StoreBase): name = 'Feedbooks' description = u'Feedbooks is a cloud publishing and distribution service, connected to a large ecosystem of reading systems and social networks. Provides a variety of genres from independent and classic books.' @@ -1415,6 +1426,16 @@ class StorePragmaticBookshelfStore(StoreBase): headquarters = 'US' formats = ['EPUB', 'MOBI', 'PDF'] +class StoreRW2010Store(StoreBase): + name = 'RW2010' + description = u'Polski serwis self-publishingowy. Pliki PDF, EPUB i MOBI. Maksymalna cena utworu nie przekracza u nas 10 złotych!' + actual_plugin = 'calibre.gui2.store.stores.rw2010_plugin:RW2010Store' + author = u'Tomasz Długosz' + + drm_free_only = True + headquarters = 'PL' + formats = ['EPUB', 'MOBI', 'PDF'] + class StoreSmashwordsStore(StoreBase): name = 'Smashwords' description = u'An ebook publishing and distribution platform for ebook authors, publishers and readers. Covers many genres and formats.' @@ -1483,7 +1504,7 @@ class XinXiiStore(StoreBase): name = 'XinXii' description = '' actual_plugin = 'calibre.gui2.store.stores.xinxii_plugin:XinXiiStore' - + headquarters = 'DE' formats = ['EPUB', 'PDF'] @@ -1513,6 +1534,7 @@ plugins += [ StoreEBookShoppeUKStore, StoreEHarlequinStore, StoreEKnigiStore, + StoreEscapeMagazineStore, StoreFeedbooksStore, StoreFoylesUKStore, StoreGandalfStore, @@ -1528,6 +1550,7 @@ plugins += [ StoreOReillyStore, StoreOzonRUStore, StorePragmaticBookshelfStore, + StoreRW2010Store, StoreSmashwordsStore, StoreVirtualoStore, StoreWaterstonesUKStore, diff --git a/src/calibre/gui2/store/stores/bewrite_plugin.py b/src/calibre/gui2/store/stores/bewrite_plugin.py index bfd543db49..b702f15623 100644 --- a/src/calibre/gui2/store/stores/bewrite_plugin.py +++ b/src/calibre/gui2/store/stores/bewrite_plugin.py @@ -80,10 +80,13 @@ class BeWriteStore(BasicStoreConfig, StorePlugin): price = '$' + price.split('$')[-1] search_result.price = price.strip() - cover_img = idata.xpath('//div[@id="content"]//img[1]/@src') + cover_img = idata.xpath('//div[@id="content"]//img/@src') if cover_img: - cover_url = 'http://www.bewrite.net/mm5/' + cover_img[0] - search_result.cover_url = cover_url.strip() + for i in cover_img: + if '00001' in i: + cover_url = 'http://www.bewrite.net/mm5/' + i + search_result.cover_url = cover_url.strip() + break formats = set([]) if idata.xpath('boolean(//div[@id="content"]//td[contains(text(), "ePub")])'): diff --git a/src/calibre/gui2/store/stores/escapemagazine_plugin.py b/src/calibre/gui2/store/stores/escapemagazine_plugin.py new file mode 100644 index 0000000000..7f3f24e7d6 --- /dev/null +++ b/src/calibre/gui2/store/stores/escapemagazine_plugin.py @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- + +from __future__ import (unicode_literals, division, absolute_import, print_function) + +__license__ = 'GPL 3' +__copyright__ = '2011, Tomasz Długosz ' +__docformat__ = 'restructuredtext en' + +import urllib +from contextlib import closing + +from lxml import html + +from PyQt4.Qt import QUrl + +from calibre import browser, url_slash_cleaner +from calibre.gui2 import open_url +from calibre.gui2.store import StorePlugin +from calibre.gui2.store.basic_config import BasicStoreConfig +from calibre.gui2.store.search_result import SearchResult +from calibre.gui2.store.web_store_dialog import WebStoreDialog + +class EscapeMagazineStore(BasicStoreConfig, StorePlugin): + + def open(self, parent=None, detail_item=None, external=False): + pid = '44010' + + url = 'http://www.escapemagazine.pl/s/' + pid + + if external or self.config.get('open_external', False): + open_url(QUrl(url_slash_cleaner(detail_item + '/s/' + pid if detail_item else url))) + else: + d = WebStoreDialog(self.gui, url, parent, detail_item) + d.setWindowTitle(self.name) + d.set_tags(self.config.get('tags', '')) + d.exec_() + + def search(self, query, max_results=20, timeout=60): + url = 'http://www.escapemagazine.pl/wyszukiwarka?query=' + urllib.quote_plus(query) + + br = browser() + + counter = max_results + with closing(br.open(url, timeout=timeout)) as f: + doc = html.fromstring(f.read()) + for data in doc.xpath('//div[@class="item item_short"]'): + if counter <= 0: + break + + id = ''.join(data.xpath('.//h2[@class="title"]/a[1]/@href')) + if not id: + continue + + title = ''.join(data.xpath('.//h2[@class="title"]/a[1]/text()')) + author = ''.join(data.xpath('.//div[@class="author"]/text()')) + price = ''.join(data.xpath('.//span[@class="price_now"]/strong/text()')) + ' zł' + cover_url = ''.join(data.xpath('.//img[@class="cover"]/@src')) + + counter -= 1 + + s = SearchResult() + s.cover_url = cover_url + s.title = title.strip() + s.author = author.strip() + s.price = price + s.detail_item = 'http://www.escapemagazine.pl' + id.strip() + s.drm = SearchResult.DRM_UNLOCKED + s.formats = 'PDF' + + yield s diff --git a/src/calibre/gui2/store/stores/google_books_plugin.py b/src/calibre/gui2/store/stores/google_books_plugin.py index 4819509c3f..a04ea45ebb 100644 --- a/src/calibre/gui2/store/stores/google_books_plugin.py +++ b/src/calibre/gui2/store/stores/google_books_plugin.py @@ -68,8 +68,8 @@ class GoogleBooksStore(BasicStoreConfig, StorePlugin): continue title = ''.join(data.xpath('.//h3/a//text()')) - authors = data.xpath('.//span[@class="gl"]//a//text()') - if authors[-1].strip().lower() in ('preview', 'read'): + authors = data.xpath('.//span[@class="f"]//a//text()') + if authors and authors[-1].strip().lower() in ('preview', 'read'): authors = authors[:-1] else: continue @@ -98,6 +98,8 @@ class GoogleBooksStore(BasicStoreConfig, StorePlugin): if not price.strip(): price = ''.join(doc.xpath('//div[@class="buy-container"]/a/text()')) price = price.split('-')[-1] + if 'view' in price.lower(): + price = 'Unknown' # No price set for this book. if not price.strip(): price = '$0.00' diff --git a/src/calibre/gui2/store/stores/manybooks_plugin.py b/src/calibre/gui2/store/stores/manybooks_plugin.py index c7dbf0a608..2b06798630 100644 --- a/src/calibre/gui2/store/stores/manybooks_plugin.py +++ b/src/calibre/gui2/store/stores/manybooks_plugin.py @@ -49,7 +49,9 @@ class ManyBooksStore(BasicStoreConfig, OpenSearchOPDSStore): counter = max_results br = browser() with closing(br.open(url, timeout=timeout)) as f: - doc = etree.fromstring(f.read()) + raw_data = f.read() + raw_data = raw_data.decode('utf-8', 'replace') + doc = etree.fromstring(raw_data) for data in doc.xpath('//*[local-name() = "entry"]'): if counter <= 0: break diff --git a/src/calibre/gui2/store/stores/oreilly_plugin.py b/src/calibre/gui2/store/stores/oreilly_plugin.py index 602a98c68e..b03fdf19e7 100644 --- a/src/calibre/gui2/store/stores/oreilly_plugin.py +++ b/src/calibre/gui2/store/stores/oreilly_plugin.py @@ -45,20 +45,20 @@ class OReillyStore(BasicStoreConfig, StorePlugin): counter = max_results with closing(br.open(url, timeout=timeout)) as f: doc = html.fromstring(f.read()) - for data in doc.xpath('//div[@id="results"]/div[@class="result"]'): + for data in doc.xpath('//div[@class="result"]'): if counter <= 0: break - full_id = ''.join(data.xpath('.//div[@class="title"]/a/@href')) + full_id = ''.join(data.xpath('./div[@class="book_text"]//p[@class="title"]/a/@href')) mo = re.search('\d+', full_id) if not mo: continue id = mo.group() - cover_url = ''.join(data.xpath('.//div[@class="bigCover"]//img/@src')) + cover_url = ''.join(data.xpath('./a/img[1]/@src')) - title = ''.join(data.xpath('.//div[@class="title"]/a/text()')) - author = ''.join(data.xpath('.//div[@class="author"]/text()')) + title = ''.join(data.xpath('./div[@class="book_text"]/p[@class="title"]/a/text()')) + author = ''.join(data.xpath('./div[@class="book_text"]/p[@class="note"][1]/text()')) author = author.split('By ')[-1].strip() # Get the detail here because we need to get the ebook id for the detail_item. diff --git a/src/calibre/gui2/store/stores/rw2010_plugin.py b/src/calibre/gui2/store/stores/rw2010_plugin.py new file mode 100644 index 0000000000..ed4d5a53f7 --- /dev/null +++ b/src/calibre/gui2/store/stores/rw2010_plugin.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- + +from __future__ import (unicode_literals, division, absolute_import, print_function) + +__license__ = 'GPL 3' +__copyright__ = '2011, Tomasz Długosz ' +__docformat__ = 'restructuredtext en' + +import re +import urllib +from contextlib import closing + +from lxml import html + +from PyQt4.Qt import QUrl + +from calibre import browser, url_slash_cleaner +from calibre.gui2 import open_url +from calibre.gui2.store import StorePlugin +from calibre.gui2.store.basic_config import BasicStoreConfig +from calibre.gui2.store.search_result import SearchResult +from calibre.gui2.store.web_store_dialog import WebStoreDialog + +class RW2010Store(BasicStoreConfig, StorePlugin): + + def open(self, parent=None, detail_item=None, external=False): + url = 'http://www.rw2010.pl/' + + if external or self.config.get('open_external', False): + open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url))) + else: + d = WebStoreDialog(self.gui, url, parent, detail_item) + d.setWindowTitle(self.name) + d.set_tags(self.config.get('tags', '')) + d.exec_() + + def search(self, query, max_results=10, timeout=60): + url = 'http://www.rw2010.pl/go.live.php/?launch_macro=catalogue-search-rd' + values={ + 'fkeyword': query, + 'file_type':'' + } + + br = browser() + + counter = max_results + with closing(br.open(url, data=urllib.urlencode(values), timeout=timeout)) as f: + doc = html.fromstring(f.read()) + for data in doc.xpath('//div[@class="ProductDetail"]'): + if counter <= 0: + break + + id = ''.join(data.xpath('.//div[@class="img"]/a/@href')) + if not id: + continue + + with closing(br.open(id.strip(), timeout=timeout/4)) as nf: + idata = html.fromstring(nf.read()) + cover_url = ''.join(idata.xpath('//div[@class="boxa"]//div[@class="img"]/img/@src')) + author = ''.join(idata.xpath('//div[@class="boxb"]//h3[text()="Autor: "]/span/text()')) + title = ''.join(idata.xpath('//div[@class="boxb"]/h2[1]/text()')) + title = re.sub(r'\(#.+\)', '', title) + formats = ''.join(idata.xpath('//div[@class="boxb"]//h3[text()="Format pliku: "]/span/text()')) + price = ''.join(idata.xpath('//div[@class="price-box"]/span/text()')) + ',00 zł' + + counter -= 1 + + s = SearchResult() + s.cover_url = 'http://www.rw2010.pl/' + cover_url + s.title = title.strip() + s.author = author.strip() + s.price = price + s.detail_item = re.sub(r'%3D', '=', id) + s.drm = SearchResult.DRM_UNLOCKED + s.formats = formats[0:-2].upper() + + yield s