mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Get Books: Remove OpenLibrary since it has the same files as archive.org. Allow direct downloading from Project Gutenberg.
This commit is contained in:
commit
299d477f75
Binary file not shown.
@ -1387,15 +1387,6 @@ class StoreOpenBooksStore(StoreBase):
|
|||||||
drm_free_only = True
|
drm_free_only = True
|
||||||
headquarters = 'US'
|
headquarters = 'US'
|
||||||
|
|
||||||
class StoreOpenLibraryStore(StoreBase):
|
|
||||||
name = 'Open Library'
|
|
||||||
description = u'One web page for every book ever published. The goal is to be a true online library. Over 20 million records from a variety of large catalogs as well as single contributions, with more on the way.'
|
|
||||||
actual_plugin = 'calibre.gui2.store.stores.open_library_plugin:OpenLibraryStore'
|
|
||||||
|
|
||||||
drm_free_only = True
|
|
||||||
headquarters = 'US'
|
|
||||||
formats = ['DAISY', 'DJVU', 'EPUB', 'MOBI', 'PDF', 'TXT']
|
|
||||||
|
|
||||||
class StoreOReillyStore(StoreBase):
|
class StoreOReillyStore(StoreBase):
|
||||||
name = 'OReilly'
|
name = 'OReilly'
|
||||||
description = u'Programming and tech ebooks from OReilly.'
|
description = u'Programming and tech ebooks from OReilly.'
|
||||||
@ -1514,7 +1505,6 @@ plugins += [
|
|||||||
StoreMobileReadStore,
|
StoreMobileReadStore,
|
||||||
StoreNextoStore,
|
StoreNextoStore,
|
||||||
StoreOpenBooksStore,
|
StoreOpenBooksStore,
|
||||||
StoreOpenLibraryStore,
|
|
||||||
StoreOReillyStore,
|
StoreOReillyStore,
|
||||||
StorePragmaticBookshelfStore,
|
StorePragmaticBookshelfStore,
|
||||||
StoreSmashwordsStore,
|
StoreSmashwordsStore,
|
||||||
|
@ -7,7 +7,6 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import urllib
|
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
@ -22,7 +21,7 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
|||||||
from calibre.utils.opensearch.description import Description
|
from calibre.utils.opensearch.description import Description
|
||||||
from calibre.utils.opensearch.query import Query
|
from calibre.utils.opensearch.query import Query
|
||||||
|
|
||||||
class OpenSearchStore(StorePlugin):
|
class OpenSearchOPDSStore(StorePlugin):
|
||||||
|
|
||||||
open_search_url = ''
|
open_search_url = ''
|
||||||
web_url = ''
|
web_url = ''
|
||||||
@ -50,7 +49,7 @@ class OpenSearchStore(StorePlugin):
|
|||||||
oquery = Query(url_template)
|
oquery = Query(url_template)
|
||||||
|
|
||||||
# set up initial values
|
# set up initial values
|
||||||
oquery.searchTerms = urllib.quote_plus(query)
|
oquery.searchTerms = query
|
||||||
oquery.count = max_results
|
oquery.count = max_results
|
||||||
url = oquery.url()
|
url = oquery.url()
|
||||||
|
|
||||||
|
@ -349,7 +349,8 @@ class SearchDialog(QDialog, Ui_Dialog):
|
|||||||
d = ChooseFormatDialog(self, _('Choose format to download to your library.'), result.downloads.keys())
|
d = ChooseFormatDialog(self, _('Choose format to download to your library.'), result.downloads.keys())
|
||||||
if d.exec_() == d.Accepted:
|
if d.exec_() == d.Accepted:
|
||||||
ext = d.format()
|
ext = d.format()
|
||||||
self.gui.download_ebook(result.downloads[ext])
|
fname = result.title + '.' + ext.lower()
|
||||||
|
self.gui.download_ebook(result.downloads[ext], filename=fname)
|
||||||
|
|
||||||
def open_store(self, result):
|
def open_store(self, result):
|
||||||
self.gui.istores[result.store_name].open(self, result.detail_item, self.open_external.isChecked())
|
self.gui.istores[result.store_name].open(self, result.detail_item, self.open_external.isChecked())
|
||||||
|
@ -6,12 +6,11 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
from calibre.gui2.store.basic_config import BasicStoreConfig
|
from calibre.gui2.store.basic_config import BasicStoreConfig
|
||||||
from calibre.gui2.store.opensearch_store import OpenSearchStore
|
from calibre.gui2.store.opensearch_store import OpenSearchOPDSStore
|
||||||
from calibre.gui2.store.search_result import SearchResult
|
from calibre.gui2.store.search_result import SearchResult
|
||||||
|
|
||||||
class ArchiveOrgStore(BasicStoreConfig, OpenSearchStore):
|
class ArchiveOrgStore(BasicStoreConfig, OpenSearchOPDSStore):
|
||||||
|
|
||||||
open_search_url = 'http://bookserver.archive.org/catalog/opensearch.xml'
|
open_search_url = 'http://bookserver.archive.org/catalog/opensearch.xml'
|
||||||
web_url = 'http://www.archive.org/details/texts'
|
web_url = 'http://www.archive.org/details/texts'
|
||||||
@ -19,7 +18,7 @@ class ArchiveOrgStore(BasicStoreConfig, OpenSearchStore):
|
|||||||
# http://bookserver.archive.org/catalog/
|
# http://bookserver.archive.org/catalog/
|
||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
for s in OpenSearchStore.search(self, query, max_results, timeout):
|
for s in OpenSearchOPDSStore.search(self, query, max_results, timeout):
|
||||||
s.detail_item = 'http://www.archive.org/details/' + s.detail_item.split(':')[-1]
|
s.detail_item = 'http://www.archive.org/details/' + s.detail_item.split(':')[-1]
|
||||||
s.price = '$0.00'
|
s.price = '$0.00'
|
||||||
s.drm = SearchResult.DRM_UNLOCKED
|
s.drm = SearchResult.DRM_UNLOCKED
|
||||||
@ -33,6 +32,7 @@ class ArchiveOrgStore(BasicStoreConfig, OpenSearchStore):
|
|||||||
from calibre import browser
|
from calibre import browser
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
|
||||||
br = browser()
|
br = browser()
|
||||||
with closing(br.open(search_result.detail_item, timeout=timeout)) as nf:
|
with closing(br.open(search_result.detail_item, timeout=timeout)) as nf:
|
||||||
idata = html.fromstring(nf.read())
|
idata = html.fromstring(nf.read())
|
||||||
|
@ -7,10 +7,10 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
from calibre.gui2.store.basic_config import BasicStoreConfig
|
from calibre.gui2.store.basic_config import BasicStoreConfig
|
||||||
from calibre.gui2.store.opensearch_store import OpenSearchStore
|
from calibre.gui2.store.opensearch_store import OpenSearchOPDSStore
|
||||||
from calibre.gui2.store.search_result import SearchResult
|
from calibre.gui2.store.search_result import SearchResult
|
||||||
|
|
||||||
class EpubBudStore(BasicStoreConfig, OpenSearchStore):
|
class EpubBudStore(BasicStoreConfig, OpenSearchOPDSStore):
|
||||||
|
|
||||||
open_search_url = 'http://www.epubbud.com/feeds/opensearch.xml'
|
open_search_url = 'http://www.epubbud.com/feeds/opensearch.xml'
|
||||||
web_url = 'http://www.epubbud.com/'
|
web_url = 'http://www.epubbud.com/'
|
||||||
@ -18,7 +18,7 @@ class EpubBudStore(BasicStoreConfig, OpenSearchStore):
|
|||||||
# http://www.epubbud.com/feeds/catalog.atom
|
# http://www.epubbud.com/feeds/catalog.atom
|
||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
for s in OpenSearchStore.search(self, query, max_results, timeout):
|
for s in OpenSearchOPDSStore.search(self, query, max_results, timeout):
|
||||||
s.price = '$0.00'
|
s.price = '$0.00'
|
||||||
s.drm = SearchResult.DRM_UNLOCKED
|
s.drm = SearchResult.DRM_UNLOCKED
|
||||||
s.formats = 'EPUB'
|
s.formats = 'EPUB'
|
||||||
|
@ -7,10 +7,10 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
from calibre.gui2.store.basic_config import BasicStoreConfig
|
from calibre.gui2.store.basic_config import BasicStoreConfig
|
||||||
from calibre.gui2.store.opensearch_store import OpenSearchStore
|
from calibre.gui2.store.opensearch_store import OpenSearchOPDSStore
|
||||||
from calibre.gui2.store.search_result import SearchResult
|
from calibre.gui2.store.search_result import SearchResult
|
||||||
|
|
||||||
class FeedbooksStore(BasicStoreConfig, OpenSearchStore):
|
class FeedbooksStore(BasicStoreConfig, OpenSearchOPDSStore):
|
||||||
|
|
||||||
open_search_url = 'http://assets0.feedbooks.net/opensearch.xml?t=1253087147'
|
open_search_url = 'http://assets0.feedbooks.net/opensearch.xml?t=1253087147'
|
||||||
web_url = 'http://feedbooks.com/'
|
web_url = 'http://feedbooks.com/'
|
||||||
@ -18,7 +18,7 @@ class FeedbooksStore(BasicStoreConfig, OpenSearchStore):
|
|||||||
# http://www.feedbooks.com/catalog
|
# http://www.feedbooks.com/catalog
|
||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
for s in OpenSearchStore.search(self, query, max_results, timeout):
|
for s in OpenSearchOPDSStore.search(self, query, max_results, timeout):
|
||||||
if s.downloads:
|
if s.downloads:
|
||||||
s.drm = SearchResult.DRM_UNLOCKED
|
s.drm = SearchResult.DRM_UNLOCKED
|
||||||
s.price = '$0.00'
|
s.price = '$0.00'
|
||||||
|
@ -6,6 +6,7 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import mimetypes
|
||||||
import urllib
|
import urllib
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
|
|
||||||
@ -23,70 +24,67 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
|||||||
class GutenbergStore(BasicStoreConfig, StorePlugin):
|
class GutenbergStore(BasicStoreConfig, StorePlugin):
|
||||||
|
|
||||||
def open(self, parent=None, detail_item=None, external=False):
|
def open(self, parent=None, detail_item=None, external=False):
|
||||||
url = 'http://m.gutenberg.org/'
|
url = 'http://gutenberg.org/'
|
||||||
ext_url = 'http://gutenberg.org/'
|
|
||||||
|
if detail_item:
|
||||||
|
detail_item = url_slash_cleaner(url + detail_item)
|
||||||
|
|
||||||
if external or self.config.get('open_external', False):
|
if external or self.config.get('open_external', False):
|
||||||
if detail_item:
|
open_url(QUrl(detail_item if detail_item else url))
|
||||||
ext_url = ext_url + detail_item
|
|
||||||
open_url(QUrl(url_slash_cleaner(ext_url)))
|
|
||||||
else:
|
else:
|
||||||
detail_url = None
|
d = WebStoreDialog(self.gui, url, parent, detail_item)
|
||||||
if detail_item:
|
|
||||||
detail_url = url + detail_item
|
|
||||||
d = WebStoreDialog(self.gui, url, parent, detail_url)
|
|
||||||
d.setWindowTitle(self.name)
|
d.setWindowTitle(self.name)
|
||||||
d.set_tags(self.config.get('tags', ''))
|
d.set_tags(self.config.get('tags', ''))
|
||||||
d.exec_()
|
d.exec_()
|
||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
# Gutenberg's website does not allow searching both author and title.
|
url = 'http://m.gutenberg.org/ebooks/search.mobile/?default_prefix=all&sort_order=title&query=' + urllib.quote_plus(query)
|
||||||
# Using a google search so we can search on both fields at once.
|
|
||||||
url = 'http://www.google.com/xhtml?q=site:gutenberg.org+' + urllib.quote_plus(query)
|
|
||||||
|
|
||||||
br = browser()
|
br = browser()
|
||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read())
|
doc = html.fromstring(f.read())
|
||||||
for data in doc.xpath('//div[@class="edewpi"]//div[@class="r ld"]'):
|
for data in doc.xpath('//ol[@class="results"]//li[contains(@class, "icon_title")]'):
|
||||||
if counter <= 0:
|
if counter <= 0:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
id = ''.join(data.xpath('./a/@href'))
|
||||||
|
id = id.split('.mobile')[0]
|
||||||
|
|
||||||
url = ''
|
title = ''.join(data.xpath('.//span[@class="title"]/text()'))
|
||||||
url_a = data.xpath('div[@class="jd"]/a')
|
author = ''.join(data.xpath('.//span[@class="subtitle"]/text()'))
|
||||||
if url_a:
|
|
||||||
url_a = url_a[0]
|
|
||||||
url = url_a.get('href', None)
|
|
||||||
if url:
|
|
||||||
url = url.split('u=')[-1].split('&')[0]
|
|
||||||
if '/ebooks/' not in url:
|
|
||||||
continue
|
|
||||||
id = url.split('/')[-1]
|
|
||||||
|
|
||||||
url_a = html.fromstring(html.tostring(url_a))
|
|
||||||
heading = ''.join(url_a.xpath('//text()'))
|
|
||||||
title, _, author = heading.rpartition('by ')
|
|
||||||
author = author.split('-')[0]
|
|
||||||
price = '$0.00'
|
|
||||||
|
|
||||||
counter -= 1
|
counter -= 1
|
||||||
|
|
||||||
s = SearchResult()
|
s = SearchResult()
|
||||||
s.cover_url = ''
|
s.cover_url = ''
|
||||||
|
|
||||||
|
s.detail_item = id.strip()
|
||||||
s.title = title.strip()
|
s.title = title.strip()
|
||||||
s.author = author.strip()
|
s.author = author.strip()
|
||||||
s.price = price.strip()
|
s.price = '$0.00'
|
||||||
s.detail_item = '/ebooks/' + id.strip()
|
|
||||||
s.drm = SearchResult.DRM_UNLOCKED
|
s.drm = SearchResult.DRM_UNLOCKED
|
||||||
|
|
||||||
yield s
|
yield s
|
||||||
|
|
||||||
def get_details(self, search_result, timeout):
|
def get_details(self, search_result, timeout):
|
||||||
url = 'http://m.gutenberg.org/'
|
url = url_slash_cleaner('http://m.gutenberg.org/' + search_result.detail_item + '.mobile')
|
||||||
|
|
||||||
br = browser()
|
br = browser()
|
||||||
with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
|
with closing(br.open(url, timeout=timeout)) as nf:
|
||||||
idata = html.fromstring(nf.read())
|
doc = html.fromstring(nf.read())
|
||||||
search_result.formats = ', '.join(idata.xpath('//a[@type!="application/atom+xml"]//span[@class="title"]/text()'))
|
|
||||||
return True
|
for save_item in doc.xpath('//li[contains(@class, "icon_save")]/a'):
|
||||||
|
type = save_item.get('type')
|
||||||
|
href = save_item.get('href')
|
||||||
|
|
||||||
|
if type:
|
||||||
|
ext = mimetypes.guess_extension(type)
|
||||||
|
if ext:
|
||||||
|
ext = ext[1:].upper().strip()
|
||||||
|
search_result.downloads[ext] = href
|
||||||
|
|
||||||
|
search_result.formats = ', '.join(search_result.downloads.keys())
|
||||||
|
|
||||||
|
return True
|
||||||
|
@ -6,89 +6,101 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re
|
import mimetypes
|
||||||
import urllib
|
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
|
|
||||||
from lxml import html
|
from lxml import etree
|
||||||
|
|
||||||
from PyQt4.Qt import QUrl
|
from calibre import browser
|
||||||
|
|
||||||
from calibre import browser, url_slash_cleaner
|
|
||||||
from calibre.gui2 import open_url
|
|
||||||
from calibre.gui2.store import StorePlugin
|
|
||||||
from calibre.gui2.store.basic_config import BasicStoreConfig
|
from calibre.gui2.store.basic_config import BasicStoreConfig
|
||||||
|
from calibre.gui2.store.opensearch_store import OpenSearchOPDSStore
|
||||||
from calibre.gui2.store.search_result import SearchResult
|
from calibre.gui2.store.search_result import SearchResult
|
||||||
from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
from calibre.utils.opensearch.description import Description
|
||||||
|
from calibre.utils.opensearch.query import Query
|
||||||
|
|
||||||
class ManyBooksStore(BasicStoreConfig, StorePlugin):
|
class ManyBooksStore(BasicStoreConfig, OpenSearchOPDSStore):
|
||||||
|
|
||||||
def open(self, parent=None, detail_item=None, external=False):
|
open_search_url = 'http://www.manybooks.net/opds/'
|
||||||
url = 'http://manybooks.net/'
|
web_url = 'http://manybooks.net'
|
||||||
|
|
||||||
detail_url = None
|
|
||||||
if detail_item:
|
|
||||||
detail_url = url + detail_item
|
|
||||||
|
|
||||||
if external or self.config.get('open_external', False):
|
|
||||||
open_url(QUrl(url_slash_cleaner(detail_url if detail_url else url)))
|
|
||||||
else:
|
|
||||||
d = WebStoreDialog(self.gui, url, parent, detail_url)
|
|
||||||
d.setWindowTitle(self.name)
|
|
||||||
d.set_tags(self.config.get('tags', ''))
|
|
||||||
d.exec_()
|
|
||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
# ManyBooks website separates results for title and author.
|
'''
|
||||||
# It also doesn't do a clear job of references authors and
|
Manybooks uses a very strange opds feed. The opds
|
||||||
# secondary titles. Google is also faster.
|
main feed is structured like a stanza feed. The
|
||||||
# Using a google search so we can search on both fields at once.
|
search result entries give very little information
|
||||||
url = 'http://www.google.com/xhtml?q=site:manybooks.net+' + urllib.quote_plus(query)
|
and requires you to go to a detail link. The detail
|
||||||
|
link has the wrong type specified (text/html instead
|
||||||
|
of application/atom+xml).
|
||||||
|
'''
|
||||||
|
if not hasattr(self, 'open_search_url'):
|
||||||
|
return
|
||||||
|
|
||||||
br = browser()
|
description = Description(self.open_search_url)
|
||||||
|
url_template = description.get_best_template()
|
||||||
|
if not url_template:
|
||||||
|
return
|
||||||
|
oquery = Query(url_template)
|
||||||
|
|
||||||
|
# set up initial values
|
||||||
|
oquery.searchTerms = query
|
||||||
|
oquery.count = max_results
|
||||||
|
url = oquery.url()
|
||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
|
br = browser()
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read())
|
doc = etree.fromstring(f.read())
|
||||||
for data in doc.xpath('//div[@class="edewpi"]//div[@class="r ld"]'):
|
for data in doc.xpath('//*[local-name() = "entry"]'):
|
||||||
if counter <= 0:
|
if counter <= 0:
|
||||||
break
|
break
|
||||||
|
|
||||||
url = ''
|
|
||||||
url_a = data.xpath('div[@class="jd"]/a')
|
|
||||||
if url_a:
|
|
||||||
url_a = url_a[0]
|
|
||||||
url = url_a.get('href', None)
|
|
||||||
if url:
|
|
||||||
url = url.split('u=')[-1][:-2]
|
|
||||||
if '/titles/' not in url:
|
|
||||||
continue
|
|
||||||
id = url.split('/')[-1]
|
|
||||||
id = id.strip()
|
|
||||||
|
|
||||||
url_a = html.fromstring(html.tostring(url_a))
|
|
||||||
heading = ''.join(url_a.xpath('//text()'))
|
|
||||||
title, _, author = heading.rpartition('by ')
|
|
||||||
author = author.split('-')[0]
|
|
||||||
price = '$0.00'
|
|
||||||
|
|
||||||
cover_url = ''
|
|
||||||
mo = re.match('^\D+', id)
|
|
||||||
if mo:
|
|
||||||
cover_name = mo.group()
|
|
||||||
cover_name = cover_name.replace('etext', '')
|
|
||||||
cover_id = id.split('.')[0]
|
|
||||||
cover_url = 'http://www.manybooks.net/images/' + id[0] + '/' + cover_name + '/' + cover_id + '-thumb.jpg'
|
|
||||||
|
|
||||||
counter -= 1
|
counter -= 1
|
||||||
|
|
||||||
s = SearchResult()
|
s = SearchResult()
|
||||||
s.cover_url = cover_url
|
|
||||||
s.title = title.strip()
|
detail_links = data.xpath('./*[local-name() = "link" and @type = "text/html"]')
|
||||||
s.author = author.strip()
|
if not detail_links:
|
||||||
s.price = price.strip()
|
continue
|
||||||
s.detail_item = '/titles/' + id
|
detail_link = detail_links[0]
|
||||||
|
detail_href = detail_link.get('href')
|
||||||
|
if not detail_href:
|
||||||
|
continue
|
||||||
|
|
||||||
|
s.detail_item = 'http://manybooks.net/titles/' + detail_href.split('tid=')[-1] + '.html'
|
||||||
|
# These can have HTML inside of them. We are going to get them again later
|
||||||
|
# just in case.
|
||||||
|
s.title = ''.join(data.xpath('./*[local-name() = "title"]//text()')).strip()
|
||||||
|
s.author = ', '.join(data.xpath('./*[local-name() = "author"]//text()')).strip()
|
||||||
|
|
||||||
|
# Follow the detail link to get the rest of the info.
|
||||||
|
with closing(br.open(detail_href, timeout=timeout/4)) as df:
|
||||||
|
ddoc = etree.fromstring(df.read())
|
||||||
|
ddata = ddoc.xpath('//*[local-name() = "entry"][1]')
|
||||||
|
if ddata:
|
||||||
|
ddata = ddata[0]
|
||||||
|
|
||||||
|
# This is the real title and author info we want. We got
|
||||||
|
# it previously just in case it's not specified here for some reason.
|
||||||
|
s.title = ''.join(ddata.xpath('./*[local-name() = "title"]//text()')).strip()
|
||||||
|
s.author = ', '.join(ddata.xpath('./*[local-name() = "author"]//text()')).strip()
|
||||||
|
if s.author.startswith(','):
|
||||||
|
s.author = s.author[1:]
|
||||||
|
if s.author.endswith(','):
|
||||||
|
s.author = s.author[:-1]
|
||||||
|
|
||||||
|
s.cover_url = ''.join(ddata.xpath('./*[local-name() = "link" and @rel = "http://opds-spec.org/thumbnail"][1]/@href')).strip()
|
||||||
|
|
||||||
|
for link in ddata.xpath('./*[local-name() = "link" and @rel = "http://opds-spec.org/acquisition"]'):
|
||||||
|
type = link.get('type')
|
||||||
|
href = link.get('href')
|
||||||
|
if type:
|
||||||
|
ext = mimetypes.guess_extension(type)
|
||||||
|
if ext:
|
||||||
|
ext = ext[1:].upper().strip()
|
||||||
|
s.downloads[ext] = href
|
||||||
|
|
||||||
|
s.price = '$0.00'
|
||||||
s.drm = SearchResult.DRM_UNLOCKED
|
s.drm = SearchResult.DRM_UNLOCKED
|
||||||
s.formts = 'EPUB, PDB (eReader, PalmDoc, zTXT, Plucker, iSilo), FB2, ZIP, AZW, MOBI, PRC, LIT, PKG, PDF, TXT, RB, RTF, LRF, TCR, JAR'
|
s.formats = 'EPUB, PDB (eReader, PalmDoc, zTXT, Plucker, iSilo), FB2, ZIP, AZW, MOBI, PRC, LIT, PKG, PDF, TXT, RB, RTF, LRF, TCR, JAR'
|
||||||
|
|
||||||
yield s
|
yield s
|
||||||
|
@ -1,84 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
|
||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
import urllib2
|
|
||||||
from contextlib import closing
|
|
||||||
|
|
||||||
from lxml import html
|
|
||||||
|
|
||||||
from PyQt4.Qt import QUrl
|
|
||||||
|
|
||||||
from calibre import browser, url_slash_cleaner
|
|
||||||
from calibre.gui2 import open_url
|
|
||||||
from calibre.gui2.store import StorePlugin
|
|
||||||
from calibre.gui2.store.basic_config import BasicStoreConfig
|
|
||||||
from calibre.gui2.store.search_result import SearchResult
|
|
||||||
from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
|
||||||
|
|
||||||
class OpenLibraryStore(BasicStoreConfig, StorePlugin):
|
|
||||||
|
|
||||||
def open(self, parent=None, detail_item=None, external=False):
|
|
||||||
url = 'http://openlibrary.org/'
|
|
||||||
|
|
||||||
if external or self.config.get('open_external', False):
|
|
||||||
if detail_item:
|
|
||||||
url = url + detail_item
|
|
||||||
open_url(QUrl(url_slash_cleaner(url)))
|
|
||||||
else:
|
|
||||||
detail_url = None
|
|
||||||
if detail_item:
|
|
||||||
detail_url = url + detail_item
|
|
||||||
d = WebStoreDialog(self.gui, url, parent, detail_url)
|
|
||||||
d.setWindowTitle(self.name)
|
|
||||||
d.set_tags(self.config.get('tags', ''))
|
|
||||||
d.exec_()
|
|
||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
|
||||||
url = 'http://openlibrary.org/search?q=' + urllib2.quote(query) + '&has_fulltext=true'
|
|
||||||
|
|
||||||
br = browser()
|
|
||||||
|
|
||||||
counter = max_results
|
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
|
||||||
doc = html.fromstring(f.read())
|
|
||||||
for data in doc.xpath('//div[@id="searchResults"]/ul[@id="siteSearch"]/li'):
|
|
||||||
if counter <= 0:
|
|
||||||
break
|
|
||||||
|
|
||||||
# Don't include books that don't have downloadable files.
|
|
||||||
if not data.xpath('boolean(./span[@class="actions"]//span[@class="label" and contains(text(), "Read")])'):
|
|
||||||
continue
|
|
||||||
id = ''.join(data.xpath('./span[@class="bookcover"]/a/@href'))
|
|
||||||
if not id:
|
|
||||||
continue
|
|
||||||
cover_url = ''.join(data.xpath('./span[@class="bookcover"]/a/img/@src'))
|
|
||||||
|
|
||||||
title = ''.join(data.xpath('.//h3[@class="booktitle"]/a[@class="results"]/text()'))
|
|
||||||
author = ''.join(data.xpath('.//span[@class="bookauthor"]/a/text()'))
|
|
||||||
price = '$0.00'
|
|
||||||
|
|
||||||
counter -= 1
|
|
||||||
|
|
||||||
s = SearchResult()
|
|
||||||
s.cover_url = cover_url
|
|
||||||
s.title = title.strip()
|
|
||||||
s.author = author.strip()
|
|
||||||
s.price = price
|
|
||||||
s.detail_item = id.strip()
|
|
||||||
s.drm = SearchResult.DRM_UNLOCKED
|
|
||||||
|
|
||||||
yield s
|
|
||||||
|
|
||||||
def get_details(self, search_result, timeout):
|
|
||||||
url = 'http://openlibrary.org/'
|
|
||||||
|
|
||||||
br = browser()
|
|
||||||
with closing(br.open(url_slash_cleaner(url + search_result.detail_item), timeout=timeout)) as nf:
|
|
||||||
idata = html.fromstring(nf.read())
|
|
||||||
search_result.formats = ', '.join(list(set(idata.xpath('//a[contains(@title, "Download")]/text()'))))
|
|
||||||
return True
|
|
@ -7,10 +7,10 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
from calibre.gui2.store.basic_config import BasicStoreConfig
|
from calibre.gui2.store.basic_config import BasicStoreConfig
|
||||||
from calibre.gui2.store.opensearch_store import OpenSearchStore
|
from calibre.gui2.store.opensearch_store import OpenSearchOPDSStore
|
||||||
from calibre.gui2.store.search_result import SearchResult
|
from calibre.gui2.store.search_result import SearchResult
|
||||||
|
|
||||||
class PragmaticBookshelfStore(BasicStoreConfig, OpenSearchStore):
|
class PragmaticBookshelfStore(BasicStoreConfig, OpenSearchOPDSStore):
|
||||||
|
|
||||||
open_search_url = 'http://pragprog.com/catalog/search-description'
|
open_search_url = 'http://pragprog.com/catalog/search-description'
|
||||||
web_url = 'http://pragprog.com/'
|
web_url = 'http://pragprog.com/'
|
||||||
@ -18,7 +18,7 @@ class PragmaticBookshelfStore(BasicStoreConfig, OpenSearchStore):
|
|||||||
# http://pragprog.com/catalog.opds
|
# http://pragprog.com/catalog.opds
|
||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
for s in OpenSearchStore.search(self, query, max_results, timeout):
|
for s in OpenSearchOPDSStore.search(self, query, max_results, timeout):
|
||||||
s.drm = SearchResult.DRM_UNLOCKED
|
s.drm = SearchResult.DRM_UNLOCKED
|
||||||
s.formats = 'EPUB, PDF, MOBI'
|
s.formats = 'EPUB, PDF, MOBI'
|
||||||
yield s
|
yield s
|
||||||
|
@ -0,0 +1,37 @@
|
|||||||
|
'''
|
||||||
|
Based on the OpenSearch Python module by Ed Summers <ehs@pobox.com> from
|
||||||
|
https://github.com/edsu/opensearch .
|
||||||
|
|
||||||
|
This module is heavily modified and does not implement all the features from
|
||||||
|
the original. The ability for the the module to perform a search and retrieve
|
||||||
|
search results has been removed. The original module used a modified version
|
||||||
|
of the Universal feed parser from http://feedparser.org/ . The use of
|
||||||
|
FeedPaser made getting search results very slow. There is also a bug in the
|
||||||
|
modified FeedParser that causes the system to run out of file descriptors.
|
||||||
|
|
||||||
|
Instead of fixing the modified feed parser it was decided to remove it and
|
||||||
|
manually parse the feeds in a set of type specific classes. This is much
|
||||||
|
faster and as we know in advance the feed format is simpler than using
|
||||||
|
FeedParser. Also, replacing the modified FeedParser with the newest version
|
||||||
|
of FeedParser caused some feeds to be parsed incorrectly and result in a loss
|
||||||
|
of data.
|
||||||
|
|
||||||
|
The module was also rewritten to use lxml instead of MiniDom.
|
||||||
|
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
description = Description(open_search_url)
|
||||||
|
url_template = description.get_best_template()
|
||||||
|
if not url_template:
|
||||||
|
return
|
||||||
|
query = Query(url_template)
|
||||||
|
|
||||||
|
# set up initial values.
|
||||||
|
query.searchTerms = search_terms
|
||||||
|
# Note the count is ignored by some feeds.
|
||||||
|
query.count = max_results
|
||||||
|
|
||||||
|
search_url = oquery.url()
|
||||||
|
|
||||||
|
'''
|
@ -40,7 +40,7 @@ class Description(object):
|
|||||||
with closing(br.open(url, timeout=15)) as f:
|
with closing(br.open(url, timeout=15)) as f:
|
||||||
doc = etree.fromstring(f.read())
|
doc = etree.fromstring(f.read())
|
||||||
|
|
||||||
# version 1.1 has repeating Url elements
|
# version 1.1 has repeating Url elements.
|
||||||
self.urls = []
|
self.urls = []
|
||||||
for element in doc.xpath('//*[local-name() = "Url"]'):
|
for element in doc.xpath('//*[local-name() = "Url"]'):
|
||||||
template = element.get('template')
|
template = element.get('template')
|
||||||
@ -50,9 +50,22 @@ class Description(object):
|
|||||||
url.template = template
|
url.template = template
|
||||||
url.type = type
|
url.type = type
|
||||||
self.urls.append(url)
|
self.urls.append(url)
|
||||||
|
# Stanza catalogs.
|
||||||
|
for element in doc.xpath('//*[local-name() = "link"]'):
|
||||||
|
if element.get('rel') != 'search':
|
||||||
|
continue
|
||||||
|
href = element.get('href')
|
||||||
|
type = element.get('type')
|
||||||
|
if href and type:
|
||||||
|
url = URL()
|
||||||
|
url.template = href
|
||||||
|
url.type = type
|
||||||
|
self.urls.append(url)
|
||||||
|
|
||||||
# this is version 1.0 specific
|
# this is version 1.0 specific.
|
||||||
self.url = ''.join(doc.xpath('//*[local-name() = "Url"][1]//text()'))
|
self.url = ''
|
||||||
|
if not self.urls:
|
||||||
|
self.url = ''.join(doc.xpath('//*[local-name() = "Url"][1]//text()'))
|
||||||
self.format = ''.join(doc.xpath('//*[local-name() = "Format"][1]//text()'))
|
self.format = ''.join(doc.xpath('//*[local-name() = "Format"][1]//text()'))
|
||||||
|
|
||||||
self.shortname = ''.join(doc.xpath('//*[local-name() = "ShortName"][1]//text()'))
|
self.shortname = ''.join(doc.xpath('//*[local-name() = "ShortName"][1]//text()'))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user