mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Store: OpenSearch based store base class. OpenSearch module added. Make some OPDS store use the new OpenSearchStore class.
This commit is contained in:
parent
4c6aa0364f
commit
8ae7d310e8
@ -6,84 +6,35 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import urllib
|
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
|
||||||
from PyQt4.Qt import QUrl
|
from calibre import browser
|
||||||
|
|
||||||
from calibre import browser, url_slash_cleaner
|
|
||||||
from calibre.gui2 import open_url
|
|
||||||
from calibre.gui2.store import StorePlugin
|
|
||||||
from calibre.gui2.store.basic_config import BasicStoreConfig
|
from calibre.gui2.store.basic_config import BasicStoreConfig
|
||||||
|
from calibre.gui2.store.opensearch_store import OpenSearchStore
|
||||||
from calibre.gui2.store.search_result import SearchResult
|
from calibre.gui2.store.search_result import SearchResult
|
||||||
from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
|
||||||
|
|
||||||
class ArchiveOrgStore(BasicStoreConfig, StorePlugin):
|
class ArchiveOrgStore(BasicStoreConfig, OpenSearchStore):
|
||||||
|
|
||||||
def open(self, parent=None, detail_item=None, external=False):
|
|
||||||
url = 'http://www.archive.org/details/texts'
|
|
||||||
|
|
||||||
if detail_item:
|
|
||||||
detail_item = url_slash_cleaner('http://www.archive.org' + detail_item)
|
|
||||||
|
|
||||||
if external or self.config.get('open_external', False):
|
|
||||||
open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url)))
|
|
||||||
else:
|
|
||||||
d = WebStoreDialog(self.gui, url, parent, detail_item)
|
|
||||||
d.setWindowTitle(self.name)
|
|
||||||
d.set_tags(self.config.get('tags', ''))
|
|
||||||
d.exec_()
|
|
||||||
|
|
||||||
|
open_search_url = 'http://bookserver.archive.org/catalog/opensearch.xml'
|
||||||
|
web_url = 'http://www.archive.org/details/texts'
|
||||||
|
|
||||||
|
# http://bookserver.archive.org/catalog/
|
||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
query = query + ' AND mediatype:texts'
|
for s in OpenSearchStore.search(self, query, max_results, timeout):
|
||||||
url = 'http://www.archive.org/search.php?query=' + urllib.quote(query)
|
s.detail_item = 'http://www.archive.org/details/' + s.detail_item.split(':')[-1]
|
||||||
|
s.price = '$0.00'
|
||||||
br = browser()
|
s.drm = SearchResult.DRM_UNLOCKED
|
||||||
|
yield s
|
||||||
counter = max_results
|
'''
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
|
||||||
doc = html.fromstring(f.read())
|
|
||||||
for data in doc.xpath('//td[@class="hitCell"]'):
|
|
||||||
if counter <= 0:
|
|
||||||
break
|
|
||||||
|
|
||||||
id = ''.join(data.xpath('.//a[@class="titleLink"]/@href'))
|
|
||||||
if not id:
|
|
||||||
continue
|
|
||||||
|
|
||||||
title = ''.join(data.xpath('.//a[@class="titleLink"]//text()'))
|
|
||||||
authors = data.xpath('.//text()')
|
|
||||||
if not authors:
|
|
||||||
continue
|
|
||||||
author = None
|
|
||||||
for a in authors:
|
|
||||||
if '-' in a:
|
|
||||||
author = a.replace('-', ' ').strip()
|
|
||||||
if author:
|
|
||||||
break
|
|
||||||
if not author:
|
|
||||||
continue
|
|
||||||
|
|
||||||
counter -= 1
|
|
||||||
|
|
||||||
s = SearchResult()
|
|
||||||
s.title = title.strip()
|
|
||||||
s.author = author.strip()
|
|
||||||
s.price = '$0.00'
|
|
||||||
s.detail_item = id.strip()
|
|
||||||
s.drm = SearchResult.DRM_UNLOCKED
|
|
||||||
|
|
||||||
yield s
|
|
||||||
|
|
||||||
def get_details(self, search_result, timeout):
|
def get_details(self, search_result, timeout):
|
||||||
url = url_slash_cleaner('http://www.archive.org' + search_result.detail_item)
|
|
||||||
|
|
||||||
br = browser()
|
br = browser()
|
||||||
with closing(br.open(url, timeout=timeout)) as nf:
|
with closing(br.open(search_result.detail_item, timeout=timeout)) as nf:
|
||||||
idata = html.fromstring(nf.read())
|
idata = html.fromstring(nf.read())
|
||||||
formats = ', '.join(idata.xpath('//p[@id="dl" and @class="content"]//a/text()'))
|
formats = ', '.join(idata.xpath('//p[@id="dl" and @class="content"]//a/text()'))
|
||||||
search_result.formats = formats.upper()
|
search_result.formats = formats.upper()
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
'''
|
||||||
|
72
src/calibre/gui2/store/opensearch_store.py
Normal file
72
src/calibre/gui2/store/opensearch_store.py
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import mimetypes
|
||||||
|
import urllib
|
||||||
|
|
||||||
|
from PyQt4.Qt import QUrl
|
||||||
|
|
||||||
|
from calibre.gui2 import open_url
|
||||||
|
from calibre.gui2.store import StorePlugin
|
||||||
|
from calibre.gui2.store.search_result import SearchResult
|
||||||
|
from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
||||||
|
from calibre.utils.opensearch import Client
|
||||||
|
|
||||||
|
class OpenSearchStore(StorePlugin):
|
||||||
|
|
||||||
|
open_search_url = ''
|
||||||
|
web_url = ''
|
||||||
|
|
||||||
|
def open(self, parent=None, detail_item=None, external=False):
|
||||||
|
if external or self.config.get('open_external', False):
|
||||||
|
open_url(QUrl(detail_item if detail_item else self.url))
|
||||||
|
else:
|
||||||
|
d = WebStoreDialog(self.gui, self.url, parent, detail_item)
|
||||||
|
d.setWindowTitle(self.name)
|
||||||
|
d.set_tags(self.config.get('tags', ''))
|
||||||
|
d.exec_()
|
||||||
|
|
||||||
|
def search(self, query, max_results=10, timeout=60):
|
||||||
|
if not hasattr(self, 'open_search_url'):
|
||||||
|
return
|
||||||
|
|
||||||
|
client = Client(self.open_search_url)
|
||||||
|
results = client.search(urllib.quote_plus(query), max_results)
|
||||||
|
|
||||||
|
counter = max_results
|
||||||
|
for r in results:
|
||||||
|
if counter <= 0:
|
||||||
|
break
|
||||||
|
counter -= 1
|
||||||
|
|
||||||
|
s = SearchResult()
|
||||||
|
|
||||||
|
s.detail_item = r.get('id', '')
|
||||||
|
|
||||||
|
links = r.get('links', None)
|
||||||
|
for l in links:
|
||||||
|
if l.get('rel', None):
|
||||||
|
if l['rel'] == u'http://opds-spec.org/image/thumbnail':
|
||||||
|
s.cover_url = l.get('href', '')
|
||||||
|
elif l['rel'] == u'http://opds-spec.org/acquisition/buy':
|
||||||
|
s.detail_item = l.get('href', s.detail_item)
|
||||||
|
elif l['rel'] == u'http://opds-spec.org/acquisition':
|
||||||
|
s.downloads.append((l.get('type', ''), l.get('href', '')))
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for mime, url in s.downloads:
|
||||||
|
ext = mimetypes.guess_extension(mime)
|
||||||
|
if ext:
|
||||||
|
formats.append(ext[1:])
|
||||||
|
s.formats = ', '.join(formats)
|
||||||
|
|
||||||
|
s.title = r.get('title', '')
|
||||||
|
s.author = r.get('author', '')
|
||||||
|
s.price = r.get('price', '')
|
||||||
|
|
||||||
|
yield s
|
@ -6,79 +6,19 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import urllib
|
|
||||||
from contextlib import closing
|
|
||||||
|
|
||||||
from lxml import html
|
|
||||||
|
|
||||||
from PyQt4.Qt import QUrl
|
|
||||||
|
|
||||||
from calibre import browser, url_slash_cleaner
|
|
||||||
from calibre.gui2 import open_url
|
|
||||||
from calibre.gui2.store import StorePlugin
|
|
||||||
from calibre.gui2.store.basic_config import BasicStoreConfig
|
from calibre.gui2.store.basic_config import BasicStoreConfig
|
||||||
|
from calibre.gui2.store.opensearch_store import OpenSearchStore
|
||||||
from calibre.gui2.store.search_result import SearchResult
|
from calibre.gui2.store.search_result import SearchResult
|
||||||
from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
|
||||||
|
|
||||||
class PragmaticBookshelfStore(BasicStoreConfig, StorePlugin):
|
class PragmaticBookshelfStore(BasicStoreConfig, OpenSearchStore):
|
||||||
|
|
||||||
def open(self, parent=None, detail_item=None, external=False):
|
open_search_url = 'http://pragprog.com/catalog/search-description'
|
||||||
url = 'http://pragprog.com/'
|
web_url = 'http://pragprog.com/'
|
||||||
|
|
||||||
if external or self.config.get('open_external', False):
|
# http://pragprog.com/catalog.opds
|
||||||
open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url)))
|
|
||||||
else:
|
|
||||||
d = WebStoreDialog(self.gui, url, parent, detail_item)
|
|
||||||
d.setWindowTitle(self.name)
|
|
||||||
d.set_tags(self.config.get('tags', ''))
|
|
||||||
d.exec_()
|
|
||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
'''
|
for s in OpenSearchStore.search(self, query, max_results, timeout):
|
||||||
OPDS based search.
|
s.drm = SearchResult.DRM_UNLOCKED
|
||||||
|
s.formats = 'EPUB, PDF, MOBI'
|
||||||
We really should get the catelog from http://pragprog.com/catalog.opds
|
yield s
|
||||||
and look for the application/opensearchdescription+xml entry.
|
|
||||||
Then get the opensearch description to get the search url and
|
|
||||||
format. However, we are going to be lazy and hard code it.
|
|
||||||
'''
|
|
||||||
url = 'http://pragprog.com/catalog/search?q=' + urllib.quote_plus(query)
|
|
||||||
|
|
||||||
br = browser()
|
|
||||||
|
|
||||||
counter = max_results
|
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
|
||||||
# Use html instead of etree as html allows us
|
|
||||||
# to ignore the namespace easily.
|
|
||||||
doc = html.fromstring(f.read())
|
|
||||||
for data in doc.xpath('//entry'):
|
|
||||||
if counter <= 0:
|
|
||||||
break
|
|
||||||
|
|
||||||
id = ''.join(data.xpath('.//link[@rel="http://opds-spec.org/acquisition/buy"]/@href'))
|
|
||||||
if not id:
|
|
||||||
continue
|
|
||||||
|
|
||||||
price = ''.join(data.xpath('.//price/@currencycode')).strip()
|
|
||||||
price += ' '
|
|
||||||
price += ''.join(data.xpath('.//price/text()')).strip()
|
|
||||||
if not price.strip():
|
|
||||||
continue
|
|
||||||
|
|
||||||
cover_url = ''.join(data.xpath('.//link[@rel="http://opds-spec.org/cover"]/@href'))
|
|
||||||
|
|
||||||
title = ''.join(data.xpath('.//title/text()'))
|
|
||||||
author = ''.join(data.xpath('.//author//text()'))
|
|
||||||
|
|
||||||
counter -= 1
|
|
||||||
|
|
||||||
s = SearchResult()
|
|
||||||
s.cover_url = cover_url
|
|
||||||
s.title = title.strip()
|
|
||||||
s.author = author.strip()
|
|
||||||
s.price = price.strip()
|
|
||||||
s.detail_item = id.strip()
|
|
||||||
s.drm = SearchResult.DRM_UNLOCKED
|
|
||||||
s.formats = 'EPUB, PDF, MOBI'
|
|
||||||
|
|
||||||
yield s
|
|
||||||
|
@ -22,6 +22,7 @@ class SearchResult(object):
|
|||||||
self.detail_item = ''
|
self.detail_item = ''
|
||||||
self.drm = None
|
self.drm = None
|
||||||
self.formats = ''
|
self.formats = ''
|
||||||
|
self.downloads = []
|
||||||
self.affiliate = False
|
self.affiliate = False
|
||||||
self.plugin_author = ''
|
self.plugin_author = ''
|
||||||
|
|
||||||
|
4
src/calibre/utils/opensearch/__init__.py
Normal file
4
src/calibre/utils/opensearch/__init__.py
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
from description import Description
|
||||||
|
from query import Query
|
||||||
|
from client import Client
|
||||||
|
from results import Results
|
39
src/calibre/utils/opensearch/client.py
Normal file
39
src/calibre/utils/opensearch/client.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
from description import Description
|
||||||
|
from query import Query
|
||||||
|
from results import Results
|
||||||
|
|
||||||
|
class Client:
|
||||||
|
|
||||||
|
"""This is the class you'll probably want to be using. You simply
|
||||||
|
pass the constructor the url for the service description file and
|
||||||
|
issue a search and get back results as an iterable Results object.
|
||||||
|
|
||||||
|
The neat thing about a Results object is that it will seamlessly
|
||||||
|
handle fetching more results from the opensearch server when it can...
|
||||||
|
so you just need to iterate and can let the paging be taken care of
|
||||||
|
for you.
|
||||||
|
|
||||||
|
from opensearch import Client
|
||||||
|
client = Client(description_url)
|
||||||
|
results = client.search("computer")
|
||||||
|
for result in results:
|
||||||
|
print result.title
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, url, agent="python-opensearch <https://github.com/edsu/opensearch>"):
|
||||||
|
self.agent = agent
|
||||||
|
self.description = Description(url, self.agent)
|
||||||
|
|
||||||
|
def search(self, search_terms, page_size=25):
|
||||||
|
"""Perform a search and get back a results object
|
||||||
|
"""
|
||||||
|
url = self.description.get_best_template()
|
||||||
|
query = Query(url)
|
||||||
|
|
||||||
|
# set up initial values
|
||||||
|
query.searchTerms = search_terms
|
||||||
|
query.count = page_size
|
||||||
|
|
||||||
|
# run the results
|
||||||
|
return Results(query, agent=self.agent)
|
||||||
|
|
127
src/calibre/utils/opensearch/description.py
Normal file
127
src/calibre/utils/opensearch/description.py
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
from urllib2 import urlopen, Request
|
||||||
|
from xml.dom.minidom import parse
|
||||||
|
from url import URL
|
||||||
|
|
||||||
|
class Description:
|
||||||
|
"""A class for representing OpenSearch Description files.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, url="", agent=""):
|
||||||
|
"""The constructor which may pass an optional url to load from.
|
||||||
|
|
||||||
|
d = Description("http://www.example.com/description")
|
||||||
|
"""
|
||||||
|
self.agent = agent
|
||||||
|
if url:
|
||||||
|
self.load(url)
|
||||||
|
|
||||||
|
|
||||||
|
def load(self, url):
|
||||||
|
"""For loading up a description object from a url. Normally
|
||||||
|
you'll probably just want to pass a URL into the constructor.
|
||||||
|
"""
|
||||||
|
req = Request(url, headers={'User-Agent':self.agent})
|
||||||
|
self.dom = parse(urlopen(req))
|
||||||
|
|
||||||
|
# version 1.1 has repeating Url elements
|
||||||
|
self.urls = self._get_urls()
|
||||||
|
|
||||||
|
# this is version 1.0 specific
|
||||||
|
self.url = self._get_element_text('Url')
|
||||||
|
self.format = self._get_element_text('Format')
|
||||||
|
|
||||||
|
self.shortname = self._get_element_text('ShortName')
|
||||||
|
self.longname = self._get_element_text('LongName')
|
||||||
|
self.description = self._get_element_text('Description')
|
||||||
|
self.image = self._get_element_text('Image')
|
||||||
|
self.samplesearch = self._get_element_text('SampleSearch')
|
||||||
|
self.developer = self._get_element_text('Developer')
|
||||||
|
self.contact = self._get_element_text('Contact')
|
||||||
|
self.attribution = self._get_element_text('Attribution')
|
||||||
|
self.syndicationright = self._get_element_text('SyndicationRight')
|
||||||
|
|
||||||
|
tag_text = self._get_element_text('Tags')
|
||||||
|
if tag_text != None:
|
||||||
|
self.tags = tag_text.split(" ")
|
||||||
|
|
||||||
|
if self._get_element_text('AdultContent') == 'true':
|
||||||
|
self.adultcontent = True
|
||||||
|
else:
|
||||||
|
self.adultcontent = False
|
||||||
|
|
||||||
|
def get_url_by_type(self, type):
|
||||||
|
"""Walks available urls and returns them by type. Only
|
||||||
|
appropriate in opensearch v1.1 where there can be multiple
|
||||||
|
query targets. Returns none if no such type is found.
|
||||||
|
|
||||||
|
url = description.get_url_by_type('application/rss+xml')
|
||||||
|
"""
|
||||||
|
for url in self.urls:
|
||||||
|
if url.type == type:
|
||||||
|
return url
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_best_template(self):
|
||||||
|
"""OK, best is a value judgement, but so be it. You'll get
|
||||||
|
back either the atom, rss or first template available. This
|
||||||
|
method handles the main difference between opensearch v1.0 and v1.1
|
||||||
|
"""
|
||||||
|
# version 1.0
|
||||||
|
if self.url:
|
||||||
|
return self.url
|
||||||
|
|
||||||
|
# atom
|
||||||
|
if self.get_url_by_type('application/atom+xml'):
|
||||||
|
return self.get_url_by_type('application/atom+xml').template
|
||||||
|
|
||||||
|
# rss
|
||||||
|
if self.get_url_by_type('application/rss+xml'):
|
||||||
|
return self.get_url_by_type('application/rss+xml').template
|
||||||
|
|
||||||
|
# other possible rss type
|
||||||
|
if self.get_url_by_type('text/xml'):
|
||||||
|
return self.get_url_by_Type('text/xml').template
|
||||||
|
|
||||||
|
# otherwise just the first one
|
||||||
|
if len(self.urls) > 0:
|
||||||
|
return self.urls[0].template
|
||||||
|
|
||||||
|
# out of luck
|
||||||
|
return Nil
|
||||||
|
|
||||||
|
|
||||||
|
# these are internal methods for querying xml
|
||||||
|
|
||||||
|
def _get_element_text(self, tag):
|
||||||
|
elements = self._get_elements(tag)
|
||||||
|
if not elements:
|
||||||
|
return None
|
||||||
|
return self._get_text(elements[0].childNodes)
|
||||||
|
|
||||||
|
def _get_attribute_text(self, tag, attribute):
|
||||||
|
elements = self._get_elements(tag)
|
||||||
|
if not elements:
|
||||||
|
return ''
|
||||||
|
return elements[0].getAttribute('template')
|
||||||
|
|
||||||
|
def _get_elements(self, tag):
|
||||||
|
return self.dom.getElementsByTagName(tag)
|
||||||
|
|
||||||
|
def _get_text(self, nodes):
|
||||||
|
text = ''
|
||||||
|
for node in nodes:
|
||||||
|
if node.nodeType == node.TEXT_NODE:
|
||||||
|
text += node.data
|
||||||
|
return text.strip()
|
||||||
|
|
||||||
|
def _get_urls(self):
|
||||||
|
urls = []
|
||||||
|
for element in self._get_elements('Url'):
|
||||||
|
template = element.getAttribute('template')
|
||||||
|
type = element.getAttribute('type')
|
||||||
|
if template and type:
|
||||||
|
url = URL()
|
||||||
|
url.template = template
|
||||||
|
url.type = type
|
||||||
|
urls.append(url)
|
||||||
|
return urls
|
2837
src/calibre/utils/opensearch/osfeedparser.py
Normal file
2837
src/calibre/utils/opensearch/osfeedparser.py
Normal file
File diff suppressed because it is too large
Load Diff
66
src/calibre/utils/opensearch/query.py
Normal file
66
src/calibre/utils/opensearch/query.py
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
from urlparse import urlparse, urlunparse
|
||||||
|
from urllib import urlencode
|
||||||
|
from cgi import parse_qs
|
||||||
|
|
||||||
|
class Query:
|
||||||
|
"""Represents an opensearch query. Used internally by the Client to
|
||||||
|
construct an opensearch url to request. Really this class is just a
|
||||||
|
helper for substituting values into the macros in a format.
|
||||||
|
|
||||||
|
format = 'http://beta.indeed.com/opensearch?q={searchTerms}&start={startIndex}&limit={count}'
|
||||||
|
q = Query(format)
|
||||||
|
q.searchTerms('zx81')
|
||||||
|
q.startIndex = 1
|
||||||
|
q.count = 25
|
||||||
|
print q.to_url()
|
||||||
|
"""
|
||||||
|
|
||||||
|
standard_macros = ['searchTerms','count','startIndex','startPage',
|
||||||
|
'language', 'outputEncoding', 'inputEncoding']
|
||||||
|
|
||||||
|
def __init__(self, format):
|
||||||
|
"""Create a query object by passing it the url format obtained
|
||||||
|
from the opensearch Description.
|
||||||
|
"""
|
||||||
|
self.format = format
|
||||||
|
|
||||||
|
# unpack the url to a tuple
|
||||||
|
self.url_parts = urlparse(format)
|
||||||
|
|
||||||
|
# unpack the query string to a dictionary
|
||||||
|
self.query_string = parse_qs(self.url_parts[4])
|
||||||
|
|
||||||
|
# look for standard macros and create a mapping of the
|
||||||
|
# opensearch names to the service specific ones
|
||||||
|
# so q={searchTerms} will result in a mapping between searchTerms and q
|
||||||
|
self.macro_map = {}
|
||||||
|
for key,values in self.query_string.items():
|
||||||
|
# TODO eventually optional/required params should be
|
||||||
|
# distinguished somehow (the ones with/without trailing ?
|
||||||
|
macro = values[0].replace('{','').replace('}','').replace('?','')
|
||||||
|
if macro in Query.standard_macros:
|
||||||
|
self.macro_map[macro] = key
|
||||||
|
|
||||||
|
def url(self):
|
||||||
|
# copy the original query string
|
||||||
|
query_string = dict(self.query_string)
|
||||||
|
|
||||||
|
# iterate through macros and set the position in the querystring
|
||||||
|
for macro, name in self.macro_map.items():
|
||||||
|
if hasattr(self, macro):
|
||||||
|
# set the name/value pair
|
||||||
|
query_string[name] = [getattr(self, macro)]
|
||||||
|
else:
|
||||||
|
# remove the name/value pair
|
||||||
|
del(query_string[name])
|
||||||
|
|
||||||
|
# copy the url parts and substitute in our new query string
|
||||||
|
url_parts = list(self.url_parts)
|
||||||
|
url_parts[4] = urlencode(query_string, 1)
|
||||||
|
|
||||||
|
# recompose and return url
|
||||||
|
return urlunparse(tuple(url_parts))
|
||||||
|
|
||||||
|
def has_macro(self, macro):
|
||||||
|
return self.macro_map.has_key(macro)
|
||||||
|
|
131
src/calibre/utils/opensearch/results.py
Normal file
131
src/calibre/utils/opensearch/results.py
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
import osfeedparser
|
||||||
|
|
||||||
|
class Results(object):
|
||||||
|
|
||||||
|
def __init__(self, query, agent=None):
|
||||||
|
self.agent = agent
|
||||||
|
self._fetch(query)
|
||||||
|
self._iter = 0
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
self._iter = 0
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return self.totalResults
|
||||||
|
|
||||||
|
def next(self):
|
||||||
|
|
||||||
|
# just keep going like the energizer bunny
|
||||||
|
while True:
|
||||||
|
|
||||||
|
# return any item we haven't returned
|
||||||
|
if self._iter < len(self.items):
|
||||||
|
self._iter += 1
|
||||||
|
return self.items[self._iter-1]
|
||||||
|
|
||||||
|
# if there appears to be more to fetch
|
||||||
|
if \
|
||||||
|
self.totalResults != 0 \
|
||||||
|
and self.totalResults > self.startIndex + self.itemsPerPage - 1:
|
||||||
|
|
||||||
|
# get the next query
|
||||||
|
next_query = self._get_next_query()
|
||||||
|
|
||||||
|
# if we got one executed it and go back to the beginning
|
||||||
|
if next_query:
|
||||||
|
self._fetch(next_query)
|
||||||
|
# very important to reset this counter
|
||||||
|
# or else the return will fail
|
||||||
|
self._iter = 0
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise StopIteration
|
||||||
|
|
||||||
|
|
||||||
|
def _fetch(self, query):
|
||||||
|
feed = osfeedparser.opensearch_parse(query.url(), agent=self.agent)
|
||||||
|
self.feed = feed
|
||||||
|
|
||||||
|
# general channel stuff
|
||||||
|
channel = feed['feed']
|
||||||
|
self.title = _pick(channel,'title')
|
||||||
|
self.link = _pick(channel,'link')
|
||||||
|
self.description = _pick(channel,'description')
|
||||||
|
self.language = _pick(channel,'language')
|
||||||
|
self.copyright = _pick(channel,'copyright')
|
||||||
|
|
||||||
|
# get back opensearch specific values
|
||||||
|
self.totalResults = _pick(channel,'opensearch_totalresults',0)
|
||||||
|
self.startIndex = _pick(channel,'opensearch_startindex',1)
|
||||||
|
self.itemsPerPage = _pick(channel,'opensearch_itemsperpage',0)
|
||||||
|
|
||||||
|
# alias items from the feed to our results object
|
||||||
|
self.items = feed['items']
|
||||||
|
|
||||||
|
# set default values if necessary
|
||||||
|
if self.startIndex == 0:
|
||||||
|
self.startIndex = 1
|
||||||
|
if self.itemsPerPage == 0 and len(self.items) > 0:
|
||||||
|
self.itemsPerPage = len(self.items)
|
||||||
|
|
||||||
|
# store away query for calculating next results
|
||||||
|
# if necessary
|
||||||
|
self.last_query = query
|
||||||
|
|
||||||
|
|
||||||
|
def _get_next_query(self):
|
||||||
|
# update our query to get the next set of records
|
||||||
|
query = self.last_query
|
||||||
|
|
||||||
|
# use start page if the query supports it
|
||||||
|
if query.has_macro('startPage'):
|
||||||
|
# if the query already defined the startPage
|
||||||
|
# we just need to increment it
|
||||||
|
if hasattr(query, 'startPage'):
|
||||||
|
query.startPage += 1
|
||||||
|
# to issue the first query startPage might not have
|
||||||
|
# been specified, so set it to 2
|
||||||
|
else:
|
||||||
|
query.startPage = 2
|
||||||
|
return query
|
||||||
|
|
||||||
|
# otherwise the query should support startIndex
|
||||||
|
elif query.has_macro('startIndex'):
|
||||||
|
# if startIndex was used before we just add the
|
||||||
|
# items per page to it to get the next set
|
||||||
|
if hasattr(query, 'startIndex'):
|
||||||
|
query.startIndex += self.itemsPerPage
|
||||||
|
# to issue the first query the startIndex may have
|
||||||
|
# been left blank in that case we assume it to be
|
||||||
|
# the item just after the last one on this page
|
||||||
|
else:
|
||||||
|
query.startIndex = self.itemsPerPage + 1
|
||||||
|
return query
|
||||||
|
|
||||||
|
# doesn't look like there is another stage to this query
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# helper for pulling values out of a dictionary if they're there
|
||||||
|
# and returning a default value if they're not
|
||||||
|
def _pick(d,key,default=None):
|
||||||
|
|
||||||
|
# get the value out
|
||||||
|
value = d.get(key)
|
||||||
|
|
||||||
|
# if it wasn't there return the default
|
||||||
|
if value == None:
|
||||||
|
return default
|
||||||
|
|
||||||
|
# if they want an int try to convert to an int
|
||||||
|
# and return default if it fails
|
||||||
|
if type(default) == int:
|
||||||
|
try:
|
||||||
|
return int(d[key])
|
||||||
|
except:
|
||||||
|
return default
|
||||||
|
|
||||||
|
# otherwise we're good to return the value
|
||||||
|
return value
|
||||||
|
|
8
src/calibre/utils/opensearch/url.py
Normal file
8
src/calibre/utils/opensearch/url.py
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
class URL:
|
||||||
|
"""Class for representing a URL in an opensearch v1.1 query"""
|
||||||
|
|
||||||
|
def __init__(self, type='', template='', method='GET'):
|
||||||
|
self.type = type
|
||||||
|
self.template = template
|
||||||
|
self.method = 'GET'
|
||||||
|
self.params = []
|
Loading…
x
Reference in New Issue
Block a user