From 96e63df8ca187136ea37942fb840220dae8c8766 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Wed, 10 Sep 2025 16:42:39 +0200 Subject: [PATCH] [mod] Open Library engine: revision of the engine (Paper result) Revision of the engine / use of the result type Paper as well as other typifications. Signed-off-by: Markus Heiser --- docs/dev/engines/online/openlibrary.rst | 8 ++ searx/engines/openlibrary.py | 124 ++++++++++++++++-------- searx/settings.yml | 2 +- 3 files changed, 90 insertions(+), 44 deletions(-) create mode 100644 docs/dev/engines/online/openlibrary.rst diff --git a/docs/dev/engines/online/openlibrary.rst b/docs/dev/engines/online/openlibrary.rst new file mode 100644 index 000000000..814271a15 --- /dev/null +++ b/docs/dev/engines/online/openlibrary.rst @@ -0,0 +1,8 @@ +.. _openlibrary engine: + +============ +Open Library +============ + +.. automodule:: searx.engines.openlibrary + :members: diff --git a/searx/engines/openlibrary.py b/searx/engines/openlibrary.py index cc1f53541..1c01db600 100644 --- a/searx/engines/openlibrary.py +++ b/searx/engines/openlibrary.py @@ -1,71 +1,109 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -"""Open library (books) -""" -from urllib.parse import urlencode -import re +"""`Open Library`_ is an open, editable library catalog, building towards a web +page for every book ever published. +.. _Open Library: https://openlibrary.org + +Configuration +============= + +The service sometimes takes a very long time to respond, the ``timeout`` may +need to be adjusted. + +.. code:: yaml + + - name: openlibrary + engine: openlibrary + shortcut: ol + timeout: 10 + + +Implementations +=============== + +""" + +from datetime import datetime +import typing as t + +from urllib.parse import urlencode from dateutil import parser +from searx.result_types import EngineResults + +if t.TYPE_CHECKING: + from searx.extended_types import SXNG_Response + from searx.search.processors import OnlineParams + about = { - 'website': 'https://openlibrary.org', - 'wikidata_id': 'Q1201876', - 'require_api_key': False, - 'use_official_api': False, - 'official_api_documentation': 'https://openlibrary.org/developers/api', + "website": "https://openlibrary.org", + "wikidata_id": "Q1201876", + "require_api_key": False, + "use_official_api": False, + "official_api_documentation": "https://openlibrary.org/developers/api", } paging = True -categories = [] +categories = ["general", "books"] base_url = "https://openlibrary.org" +search_api = "https://openlibrary.org/search.json" +"""The engine uses the API at the endpoint search.json_. + +.. _search.json: https://openlibrary.org/dev/docs/api/search +""" results_per_page = 10 -def request(query, params): +def request(query: str, params: "OnlineParams") -> None: args = { - 'q': query, - 'page': params['pageno'], - 'limit': results_per_page, + "q": query, + "page": params["pageno"], + "limit": results_per_page, + "fields": "*", } - params['url'] = f"{base_url}/search.json?{urlencode(args)}" - return params + params["url"] = f"{search_api}?{urlencode(args)}" + logger.debug("REST API: %s", params["url"]) -def _parse_date(date): - try: - return parser.parse(date) - except parser.ParserError: - return None +def response(resp: "SXNG_Response") -> EngineResults: + res = EngineResults() + json_data = resp.json() - -def response(resp): - results = [] - - for item in resp.json().get("docs", []): - cover = None - if 'lending_identifier_s' in item: + for item in json_data.get("docs", []): + cover = "" + if "lending_identifier_s" in item: cover = f"https://archive.org/services/img/{item['lending_identifier_s']}" - published = item.get('publish_date') + published = item.get("publish_date") if published: published_dates = [date for date in map(_parse_date, published) if date] if published_dates: published = min(published_dates) if not published: - published = parser.parse(str(item.get('first_published_year'))) + published = _parse_date(str(item.get("first_publish_year"))) - result = { - 'template': 'paper.html', - 'url': f"{base_url}{item['key']}", - 'title': item['title'], - 'content': re.sub(r"\{|\}", "", item['first_sentence'][0]) if item.get('first_sentence') else '', - 'isbn': item.get('isbn', [])[:5], - 'authors': item.get('author_name', []), - 'thumbnail': cover, - 'publishedDate': published, - 'tags': item.get('subject', [])[:10] + item.get('place', [])[:10], - } - results.append(result) + content = " / ".join(item.get("first_sentence", [])) + res.add( + res.types.Paper( + url=f"{base_url}/{item['key']}", + title=item["title"], + content=content, + isbn=item.get("isbn", [])[:5], + authors=item.get("author_name", []), + thumbnail=cover, + publishedDate=published, + tags=item.get("subject", [])[:10] + item.get("place", [])[:10], + ) + ) + return res - return results + +def _parse_date(date: str) -> datetime | None: + if not date: + return None + try: + return parser.parse(date) + except parser.ParserError: + return None diff --git a/searx/settings.yml b/searx/settings.yml index e34f501d2..4f260cae0 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1517,7 +1517,7 @@ engines: - name: openlibrary engine: openlibrary shortcut: ol - timeout: 5 + timeout: 10 disabled: true - name: openmeteo