From c3e3d2d85de1d82ecb6bbde469c1961306c268f7 Mon Sep 17 00:00:00 2001 From: Bnyro Date: Sat, 21 Feb 2026 22:33:20 +0100 Subject: [PATCH] [feat] engines: add pexels engine --- requirements.txt | 1 + searx/engines/pexels.py | 112 ++++++++++++++++++++++++++++++++++++++++ searx/settings.yml | 4 ++ 3 files changed, 117 insertions(+) create mode 100644 searx/engines/pexels.py diff --git a/requirements.txt b/requirements.txt index 07429f7b0..83d484d67 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,3 +19,4 @@ typer==0.24.0 isodate==0.7.2 whitenoise==6.11.0 typing-extensions==4.15.0 +cloudscraper==1.2.71 diff --git a/searx/engines/pexels.py b/searx/engines/pexels.py new file mode 100644 index 000000000..57d2d1424 --- /dev/null +++ b/searx/engines/pexels.py @@ -0,0 +1,112 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Pexels (images)""" + +import re + +from urllib.parse import urlencode +from lxml import html + +import cloudscraper + +from searx.result_types import EngineResults +from searx.utils import eval_xpath_list +from searx.enginelib import EngineCache +from searx.exceptions import SearxEngineAPIException +from searx.network import get + + +# about +about = { + "website": 'https://www.pexels.com', + "wikidata_id": 'Q101240504', + "official_api_documentation": 'https://www.pexels.com/api/', + "use_official_api": False, + "require_api_key": False, + "results": 'JSON', +} + +base_url = 'https://www.pexels.com' +categories = ['images'] +results_per_page = 20 + +paging = True +time_range_support = True +time_range_map = {'day': 'last_24_hours', 'week': 'last_week', 'month': 'last_month', 'year': 'last_year'} + +SECRET_KEY_RE = re.compile('"secret-key":\b*"(.*?)"') +SECRET_KEY_DB_KEY = "secret-key" + + +CACHE: EngineCache +"""Cache to store the secret API key for the engine.""" + + +def init(engine_settings): + global CACHE # pylint: disable=global-statement + CACHE = EngineCache(engine_settings["name"]) + + +def _get_secret_key(): + scraper = cloudscraper.create_scraper() + resp = scraper.get(base_url) + if resp.status_code != 200: + raise SearxEngineAPIException("failed to obtain secret key") + + doc = html.fromstring(resp.text) + for script_src in eval_xpath_list(doc, "//script/@src"): + script = get(script_src) + if script.status_code != 200: + raise SearxEngineAPIException("failed to obtain secret key") + + match = SECRET_KEY_RE.search(script.text) + if match: + return match.groups()[0] + + # all scripts checked, but secret key was not found + raise SearxEngineAPIException("failed to obtain secret key") + + +def request(query, params): + args = { + 'query': query, + 'page': params['pageno'], + 'per_page': results_per_page, + } + if params['time_range']: + args['date_from'] = time_range_map[params['time_range']] + + params["url"] = f"{base_url}/en-us/api/v3/search/photos?{urlencode(args)}" + + # cache api key for future requests + secret_key = CACHE.get(SECRET_KEY_DB_KEY) + if not secret_key: + secret_key = _get_secret_key() + CACHE.set(SECRET_KEY_DB_KEY, secret_key) + + params["headers"]["secret-key"] = CACHE.get(SECRET_KEY_DB_KEY) + + return params + + +def response(resp): + res = EngineResults() + json_data = resp.json() + + for result in json_data.get('data', []): + attrs = result["attributes"] + res.add( + res.types.LegacyResult( + { + 'template': 'images.html', + 'url': f"{base_url}/photo/{attrs['slug']}-{attrs['id']}/", + 'title': attrs["title"], + 'content': attrs["description"], + 'thumbnail_src': attrs["image"]["small"], + 'img_src': attrs["image"]["download_link"], + 'resolution': f"{attrs['width']}x{attrs['height']}", + 'author': f"{attrs['user']['username']}", + } + ) + ) + + return res diff --git a/searx/settings.yml b/searx/settings.yml index 6fb24adf8..580f29d0f 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1539,6 +1539,10 @@ engines: # Hide obsolete PDB entries. Default is not to hide obsolete structures # hide_obsolete: false + - name: pexels + engine: pexels + shortcut: pe + - name: photon engine: photon shortcut: ph