From 58ca9bc7d0cb8e35641573af395b35d644a227ab Mon Sep 17 00:00:00 2001 From: John Schember Date: Sun, 3 Jul 2011 10:05:43 -0400 Subject: [PATCH] Store: Change class name of opensearch store to allow for easily adding more opensearch results for other feed types. Document opensearch module changes. --- src/calibre/gui2/store/opensearch_store.py | 6 ++- .../gui2/store/stores/archive_org_plugin.py | 8 ++-- .../gui2/store/stores/epubbud_plugin.py | 6 +-- .../gui2/store/stores/feedbooks_plugin.py | 6 +-- .../stores/pragmatic_bookshelf_plugin.py | 6 +-- src/calibre/utils/opensearch/__init__.py | 37 +++++++++++++++++++ 6 files changed, 55 insertions(+), 14 deletions(-) diff --git a/src/calibre/gui2/store/opensearch_store.py b/src/calibre/gui2/store/opensearch_store.py index 54fedbd002..6e8f5de7ba 100644 --- a/src/calibre/gui2/store/opensearch_store.py +++ b/src/calibre/gui2/store/opensearch_store.py @@ -22,7 +22,7 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog from calibre.utils.opensearch.description import Description from calibre.utils.opensearch.query import Query -class OpenSearchStore(StorePlugin): +class OpenSearchOPDSStore(StorePlugin): open_search_url = '' web_url = '' @@ -99,3 +99,7 @@ class OpenSearchStore(StorePlugin): yield s + +class OpenSearchOPDSDetailStore(OpenSearchOPDSStore): + + pass diff --git a/src/calibre/gui2/store/stores/archive_org_plugin.py b/src/calibre/gui2/store/stores/archive_org_plugin.py index 6972c604ce..7439056baa 100644 --- a/src/calibre/gui2/store/stores/archive_org_plugin.py +++ b/src/calibre/gui2/store/stores/archive_org_plugin.py @@ -6,12 +6,11 @@ __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' - from calibre.gui2.store.basic_config import BasicStoreConfig -from calibre.gui2.store.opensearch_store import OpenSearchStore +from calibre.gui2.store.opensearch_store import OpenSearchOPDSStore from calibre.gui2.store.search_result import SearchResult -class ArchiveOrgStore(BasicStoreConfig, OpenSearchStore): +class ArchiveOrgStore(BasicStoreConfig, OpenSearchOPDSStore): open_search_url = 'http://bookserver.archive.org/catalog/opensearch.xml' web_url = 'http://www.archive.org/details/texts' @@ -19,7 +18,7 @@ class ArchiveOrgStore(BasicStoreConfig, OpenSearchStore): # http://bookserver.archive.org/catalog/ def search(self, query, max_results=10, timeout=60): - for s in OpenSearchStore.search(self, query, max_results, timeout): + for s in OpenSearchOPDSStore.search(self, query, max_results, timeout): s.detail_item = 'http://www.archive.org/details/' + s.detail_item.split(':')[-1] s.price = '$0.00' s.drm = SearchResult.DRM_UNLOCKED @@ -33,6 +32,7 @@ class ArchiveOrgStore(BasicStoreConfig, OpenSearchStore): from calibre import browser from contextlib import closing from lxml import html + br = browser() with closing(br.open(search_result.detail_item, timeout=timeout)) as nf: idata = html.fromstring(nf.read()) diff --git a/src/calibre/gui2/store/stores/epubbud_plugin.py b/src/calibre/gui2/store/stores/epubbud_plugin.py index b4d642f62b..029b2b3fc9 100644 --- a/src/calibre/gui2/store/stores/epubbud_plugin.py +++ b/src/calibre/gui2/store/stores/epubbud_plugin.py @@ -7,10 +7,10 @@ __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' from calibre.gui2.store.basic_config import BasicStoreConfig -from calibre.gui2.store.opensearch_store import OpenSearchStore +from calibre.gui2.store.opensearch_store import OpenSearchOPDSStore from calibre.gui2.store.search_result import SearchResult -class EpubBudStore(BasicStoreConfig, OpenSearchStore): +class EpubBudStore(BasicStoreConfig, OpenSearchOPDSStore): open_search_url = 'http://www.epubbud.com/feeds/opensearch.xml' web_url = 'http://www.epubbud.com/' @@ -18,7 +18,7 @@ class EpubBudStore(BasicStoreConfig, OpenSearchStore): # http://www.epubbud.com/feeds/catalog.atom def search(self, query, max_results=10, timeout=60): - for s in OpenSearchStore.search(self, query, max_results, timeout): + for s in OpenSearchOPDSStore.search(self, query, max_results, timeout): s.price = '$0.00' s.drm = SearchResult.DRM_UNLOCKED s.formats = 'EPUB' diff --git a/src/calibre/gui2/store/stores/feedbooks_plugin.py b/src/calibre/gui2/store/stores/feedbooks_plugin.py index 96d0a10dc7..cac44fd8df 100644 --- a/src/calibre/gui2/store/stores/feedbooks_plugin.py +++ b/src/calibre/gui2/store/stores/feedbooks_plugin.py @@ -7,10 +7,10 @@ __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' from calibre.gui2.store.basic_config import BasicStoreConfig -from calibre.gui2.store.opensearch_store import OpenSearchStore +from calibre.gui2.store.opensearch_store import OpenSearchOPDSStore from calibre.gui2.store.search_result import SearchResult -class FeedbooksStore(BasicStoreConfig, OpenSearchStore): +class FeedbooksStore(BasicStoreConfig, OpenSearchOPDSStore): open_search_url = 'http://assets0.feedbooks.net/opensearch.xml?t=1253087147' web_url = 'http://feedbooks.com/' @@ -18,7 +18,7 @@ class FeedbooksStore(BasicStoreConfig, OpenSearchStore): # http://www.feedbooks.com/catalog def search(self, query, max_results=10, timeout=60): - for s in OpenSearchStore.search(self, query, max_results, timeout): + for s in OpenSearchOPDSStore.search(self, query, max_results, timeout): if s.downloads: s.drm = SearchResult.DRM_UNLOCKED s.price = '$0.00' diff --git a/src/calibre/gui2/store/stores/pragmatic_bookshelf_plugin.py b/src/calibre/gui2/store/stores/pragmatic_bookshelf_plugin.py index 671186ba87..99b94778bf 100644 --- a/src/calibre/gui2/store/stores/pragmatic_bookshelf_plugin.py +++ b/src/calibre/gui2/store/stores/pragmatic_bookshelf_plugin.py @@ -7,10 +7,10 @@ __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' from calibre.gui2.store.basic_config import BasicStoreConfig -from calibre.gui2.store.opensearch_store import OpenSearchStore +from calibre.gui2.store.opensearch_store import OpenSearchOPDSStore from calibre.gui2.store.search_result import SearchResult -class PragmaticBookshelfStore(BasicStoreConfig, OpenSearchStore): +class PragmaticBookshelfStore(BasicStoreConfig, OpenSearchOPDSStore): open_search_url = 'http://pragprog.com/catalog/search-description' web_url = 'http://pragprog.com/' @@ -18,7 +18,7 @@ class PragmaticBookshelfStore(BasicStoreConfig, OpenSearchStore): # http://pragprog.com/catalog.opds def search(self, query, max_results=10, timeout=60): - for s in OpenSearchStore.search(self, query, max_results, timeout): + for s in OpenSearchOPDSStore.search(self, query, max_results, timeout): s.drm = SearchResult.DRM_UNLOCKED s.formats = 'EPUB, PDF, MOBI' yield s diff --git a/src/calibre/utils/opensearch/__init__.py b/src/calibre/utils/opensearch/__init__.py index e69de29bb2..3d0c4d8787 100644 --- a/src/calibre/utils/opensearch/__init__.py +++ b/src/calibre/utils/opensearch/__init__.py @@ -0,0 +1,37 @@ +''' +Based on the OpenSearch Python module by Ed Summers from +https://github.com/edsu/opensearch . + +This module is heavily modified and does not implement all the features from +the original. The ability for the the module to perform a search and retrieve +search results has been removed. The original module used a modified version +of the Universal feed parser from http://feedparser.org/ . The use of +FeedPaser made getting search results very slow. There is also a bug in the +modified FeedParser that causes the system to run out of file descriptors. + +Instead of fixing the modified feed parser it was decided to remove it and +manually parse the feeds in a set of type specific classes. This is much +faster and as we know in advance the feed format is simpler than using +FeedParser. Also, replacing the modified FeedParser with the newest version +of FeedParser caused some feeds to be parsed incorrectly and result in a loss +of data. + +The module was also rewritten to use lxml instead of MiniDom. + + +Usage: + +description = Description(open_search_url) +url_template = description.get_best_template() +if not url_template: + return +query = Query(url_template) + +# set up initial values. +query.searchTerms = urllib.quote_plus(search_terms) +# Note the count is ignored by some feeds. +query.count = max_results + +search_url = oquery.url() + +'''