mirror of
https://github.com/searxng/searxng.git
synced 2025-11-21 22:13:18 -05:00
The requests changed here all run outside of the network context timeout, thereby preventing the engine's timeout from being applied (the engine's timeout can become longer than it was configured). Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
158 lines
4.4 KiB
Python
158 lines
4.4 KiB
Python
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
"""SoundCloud is a German audio streaming service."""
|
|
|
|
import re
|
|
import datetime
|
|
|
|
from urllib.parse import quote_plus, urlencode
|
|
|
|
from dateutil import parser
|
|
from lxml import html
|
|
|
|
from searx.network import get as http_get
|
|
from searx.enginelib import EngineCache
|
|
|
|
about = {
|
|
"website": "https://soundcloud.com",
|
|
"wikidata_id": "Q568769",
|
|
"official_api_documentation": "https://developers.soundcloud.com/docs/api/guide",
|
|
"use_official_api": False,
|
|
"require_api_key": False,
|
|
"results": 'JSON',
|
|
}
|
|
|
|
categories = ["music"]
|
|
paging = True
|
|
|
|
search_url = "https://api-v2.soundcloud.com/search"
|
|
"""This is not the official (developer) url, it is the API which is used by the
|
|
HTML frontend of the common WEB site.
|
|
"""
|
|
|
|
cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U)
|
|
results_per_page = 10
|
|
|
|
soundcloud_facet = "model"
|
|
|
|
app_locale_map = {
|
|
"de": "de",
|
|
"en": "en",
|
|
"es": "es",
|
|
"fr": "fr",
|
|
"oc": "fr",
|
|
"it": "it",
|
|
"nl": "nl",
|
|
"pl": "pl",
|
|
"szl": "pl",
|
|
"pt": "pt_BR",
|
|
"pap": "pt_BR",
|
|
"sv": "sv",
|
|
}
|
|
|
|
CACHE: EngineCache
|
|
"""Persistent (SQLite) key/value cache that deletes its values after ``expire``
|
|
seconds."""
|
|
|
|
|
|
def request(query, params):
|
|
|
|
# missing attributes: user_id, app_version
|
|
# - user_id=451561-497874-703312-310156
|
|
# - app_version=1740727428
|
|
|
|
guest_client_id = CACHE.get("guest_client_id")
|
|
if guest_client_id is None:
|
|
guest_client_id = get_client_id()
|
|
if guest_client_id:
|
|
CACHE.set(key="guest_client_id", value=guest_client_id)
|
|
|
|
args = {
|
|
"q": query,
|
|
"offset": (params['pageno'] - 1) * results_per_page,
|
|
"limit": results_per_page,
|
|
"facet": soundcloud_facet,
|
|
"client_id": guest_client_id,
|
|
"app_locale": app_locale_map.get(params["language"].split("-")[0], "en"),
|
|
}
|
|
|
|
params['url'] = f"{search_url}?{urlencode(args)}"
|
|
return params
|
|
|
|
|
|
def response(resp):
|
|
results = []
|
|
data = resp.json()
|
|
|
|
for result in data.get("collection", []):
|
|
|
|
if result["kind"] in ("track", "playlist"):
|
|
url = result.get("permalink_url")
|
|
if not url:
|
|
continue
|
|
uri = quote_plus(result.get("uri"))
|
|
content = [
|
|
result.get("description"),
|
|
result.get("label_name"),
|
|
]
|
|
res = {
|
|
"url": url,
|
|
"title": result["title"],
|
|
"content": " / ".join([c for c in content if c]),
|
|
"publishedDate": parser.parse(result["last_modified"]),
|
|
"iframe_src": "https://w.soundcloud.com/player/?url=" + uri,
|
|
"views": result.get("likes_count"),
|
|
}
|
|
thumbnail = result["artwork_url"] or result["user"]["avatar_url"]
|
|
res["thumbnail"] = thumbnail or None
|
|
length = int(result.get("duration", 0) / 1000)
|
|
if length:
|
|
length = datetime.timedelta(seconds=length)
|
|
res["length"] = length
|
|
res["views"] = result.get("playback_count", 0) or None
|
|
res["author"] = result.get("user", {}).get("full_name") or None
|
|
results.append(res)
|
|
|
|
return results
|
|
|
|
|
|
def init(engine_settings): # pylint: disable=unused-argument
|
|
global CACHE # pylint: disable=global-statement
|
|
CACHE = EngineCache(engine_settings["name"]) # type:ignore
|
|
|
|
|
|
def get_client_id() -> str | None:
|
|
|
|
client_id = ""
|
|
url = "https://soundcloud.com"
|
|
resp = http_get(url, timeout=3)
|
|
|
|
if not resp.ok:
|
|
logger.error("init: GET %s failed", url)
|
|
return client_id
|
|
|
|
tree = html.fromstring(resp.content)
|
|
script_tags = tree.xpath("//script[contains(@src, '/assets/')]")
|
|
app_js_urls = [tag.get("src") for tag in script_tags if tag is not None]
|
|
|
|
# extracts valid app_js urls from soundcloud.com content
|
|
|
|
for url in app_js_urls[::-1]:
|
|
|
|
# gets app_js and search for the client_id
|
|
resp = http_get(url)
|
|
|
|
if not resp.ok:
|
|
logger.error("init: app_js GET %s failed", url)
|
|
continue
|
|
|
|
cids = cid_re.search(resp.content.decode())
|
|
if cids and len(cids.groups()):
|
|
client_id = cids.groups()[0]
|
|
break
|
|
|
|
if client_id:
|
|
logger.info("using client_id '%s' for soundclud queries", client_id)
|
|
else:
|
|
logger.warning("missing valid client_id for soundclud queries")
|
|
return client_id or None
|