[feat] engines: add Azure resources engine (#5235)

Adds a new engine `searx/engines/azure.py` to search cloud resources on Azure.

A lot of enterprise users have to deal with Azure Public Cloud.  This helps them
easily search for cloud resources without logging in to the Portal first

How to test this PR locally?

You should create an App Registration on Azure Entra Id with Reader access on
the resources you want to search for.  You should create a Secret for the App
Registration.  After that, you should set up appropriate values in the
`settings.yml` file [1]::

   - name: azure
     engine: azure
     ...
     azure_tenant_id: "your_tenant_id"
     azure_client_id: "your_client_id"
     azure_client_secret: "your_client_secret"
     azure_token_expiration_seconds: 5000

[1] https://github.com/searxng/searxng/pull/5235#issuecomment-3397664928

Co-authored-by: Bnyro <bnyro@tutanota.com>
Co-authored-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Tommaso Colella 2025-10-13 16:33:08 +02:00 committed by GitHub
parent 8baefcc21e
commit c34bb61284
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 207 additions and 0 deletions

View File

@ -0,0 +1,8 @@
.. _azure engine:
===============
Azure Resources
===============
.. automodule:: searx.engines.azure
:members:

190
searx/engines/azure.py Normal file
View File

@ -0,0 +1,190 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Engine for Azure resources. This engine mimics the standard search bar in Azure
Portal (for resources and resource groups).
Configuration
=============
You must `register an application in Microsoft Entra ID`_ and assign it the
'Reader' role in your subscription.
To use this engine, add an entry similar to the following to your engine list in
``settings.yml``:
.. code:: yaml
- name: azure
engine: azure
...
azure_tenant_id: "your_tenant_id"
azure_client_id: "your_client_id"
azure_client_secret: "your_client_secret"
azure_token_expiration_seconds: 5000
.. _register an application in Microsoft Entra ID:
https://learn.microsoft.com/en-us/entra/identity-platform/quickstart-register-app
"""
import typing as t
from searx.enginelib import EngineCache
from searx.network import post as http_post
from searx.result_types import EngineResults
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
from searx.search.processors import OnlineParams
engine_type = "online"
categories = ["it", "cloud"]
# Default values, should be overridden in settings.yml
azure_tenant_id = ""
azure_client_id = ""
azure_client_secret = ""
azure_token_expiration_seconds = 5000
"""Time for which an auth token is valid (sec.)"""
azure_batch_endpoint = "https://management.azure.com/batch?api-version=2020-06-01"
about = {
"website": "https://www.portal.azure.com",
"wikidata_id": "Q725967",
"official_api_documentation": "https://learn.microsoft.com/en-us/\
rest/api/azure-resourcegraph/?view=rest-azureresourcegraph-resourcegraph-2024-04-01",
"use_official_api": True,
"require_api_key": True,
"results": "JSON",
"language": "en",
}
CACHE: EngineCache
"""Persistent (SQLite) key/value cache that deletes its values after ``expire``
seconds."""
def setup(engine_settings: dict[str, t.Any]) -> bool:
"""Initialization of the engine.
- Instantiate a cache for this engine (:py:obj:`CACHE`).
- Checks whether the tenant_id, client_id and client_secret are set,
otherwise the engine is inactive.
"""
global CACHE # pylint: disable=global-statement
CACHE = EngineCache(engine_settings["name"])
missing_opts: list[str] = []
for opt in ("azure_tenant_id", "azure_client_id", "azure_client_secret"):
if not engine_settings.get(opt, ""):
missing_opts.append(opt)
if missing_opts:
logger.error("missing values for options: %s", ", ".join(missing_opts))
return False
return True
def authenticate(t_id: str, c_id: str, c_secret: str) -> str:
"""Authenticates to Azure using Oauth2 Client Credentials Flow and returns
an access token."""
url = f"https://login.microsoftonline.com/{t_id}/oauth2/v2.0/token"
body = {
"client_id": c_id,
"client_secret": c_secret,
"grant_type": "client_credentials",
"scope": "https://management.azure.com/.default",
}
resp: SXNG_Response = http_post(url, body)
if resp.status_code != 200:
raise RuntimeError(f"Azure authentication failed (status {resp.status_code}): {resp.text}")
return resp.json()["access_token"]
def get_auth_token(t_id: str, c_id: str, c_secret: str) -> str:
key = f"azure_tenant_id: {t_id:}, azure_client_id: {c_id}, azure_client_secret: {c_secret}"
token: str | None = CACHE.get(key)
if token:
return token
token = authenticate(t_id, c_id, c_secret)
CACHE.set(key=key, value=token, expire=azure_token_expiration_seconds)
return token
def request(query: str, params: "OnlineParams") -> None:
token = get_auth_token(azure_tenant_id, azure_client_id, azure_client_secret)
params["url"] = azure_batch_endpoint
params["method"] = "POST"
params["headers"]["Authorization"] = f"Bearer {token}"
params["headers"]["Content-Type"] = "application/json"
params["json"] = {
"requests": [
{
"url": "/providers/Microsoft.ResourceGraph/resources?api-version=2024-04-01",
"httpMethod": "POST",
"name": "resourceGroups",
"requestHeaderDetails": {"commandName": "Microsoft.ResourceGraph"},
"content": {
"query": (
f"ResourceContainers"
f" | where (name contains ('{query}'))"
f" | where (type =~ ('Microsoft.Resources/subscriptions/resourcegroups'))"
f" | project id,name,type,kind,subscriptionId,resourceGroup"
f" | extend matchscore = name startswith '{query}'"
f" | extend normalizedName = tolower(tostring(name))"
f" | sort by matchscore desc, normalizedName asc"
f" | take 30"
)
},
},
{
"url": "/providers/Microsoft.ResourceGraph/resources?api-version=2024-04-01",
"httpMethod": "POST",
"name": "resources",
"requestHeaderDetails": {
"commandName": "Microsoft.ResourceGraph",
},
"content": {
"query": f"Resources | where name contains '{query}' | take 30",
},
},
]
}
def response(resp: "SXNG_Response") -> EngineResults:
res = EngineResults()
json_data = resp.json()
for result in json_data["responses"]:
if result["name"] == "resourceGroups":
for data in result["content"]["data"]:
res.add(
res.types.MainResult(
url=(
f"https://portal.azure.com/#@/resource"
f"/subscriptions/{data['subscriptionId']}/resourceGroups/{data['name']}/overview"
),
title=data["name"],
content=f"Resource Group in Subscription: {data['subscriptionId']}",
)
)
elif result["name"] == "resources":
for data in result["content"]["data"]:
res.add(
res.types.MainResult(
url=(
f"https://portal.azure.com/#@/resource"
f"/subscriptions/{data['subscriptionId']}/resourceGroups/{data['resourceGroup']}"
f"/providers/{data['type']}/{data['name']}/overview"
),
title=data["name"],
content=(
f"Resource of type {data['type']} in Subscription:"
f" {data['subscriptionId']}, Resource Group: {data['resourceGroup']}"
),
)
)
return res

View File

@ -495,6 +495,15 @@ engines:
shortcut: ask
disabled: true
# - name: azure
# engine: azure
# shortcut: az
# categories: [it, cloud]
# azure_tenant_id: "your_tenant_id"
# azure_client_id: "your_client_id"
# azure_client_secret: "your_client_secret"
# disabled: true
# tmp suspended: dh key too small
# - name: base
# engine: base