mirror of
				https://github.com/searxng/searxng.git
				synced 2025-11-03 19:17:07 -05:00 
			
		
		
		
	Closes: https://github.com/searxng/searxng/issues/4226 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
		
			
				
	
	
		
			147 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			147 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
# SPDX-License-Identifier: AGPL-3.0-or-later
 | 
						|
"""SoundCloud is a German audio streaming service."""
 | 
						|
 | 
						|
import re
 | 
						|
from urllib.parse import quote_plus, urlencode
 | 
						|
import datetime
 | 
						|
 | 
						|
from dateutil import parser
 | 
						|
from lxml import html
 | 
						|
 | 
						|
from searx.network import get as http_get
 | 
						|
 | 
						|
about = {
 | 
						|
    "website": "ttps://soundcloud.com",
 | 
						|
    "wikidata_id": "Q568769",
 | 
						|
    "official_api_documentation": "https://developers.soundcloud.com/docs/api/guide",
 | 
						|
    "use_official_api": False,
 | 
						|
    "require_api_key": False,
 | 
						|
    "results": 'JSON',
 | 
						|
}
 | 
						|
 | 
						|
categories = ["music"]
 | 
						|
paging = True
 | 
						|
 | 
						|
search_url = "https://api-v2.soundcloud.com/search"
 | 
						|
"""This is not the offical (developer) url, it is the API which is used by the
 | 
						|
HTML frontend of the common WEB site.
 | 
						|
"""
 | 
						|
 | 
						|
cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U)
 | 
						|
guest_client_id = ""
 | 
						|
results_per_page = 10
 | 
						|
 | 
						|
soundcloud_facet = "model"
 | 
						|
 | 
						|
app_locale_map = {
 | 
						|
    "de": "de",
 | 
						|
    "en": "en",
 | 
						|
    "es": "es",
 | 
						|
    "fr": "fr",
 | 
						|
    "oc": "fr",
 | 
						|
    "it": "it",
 | 
						|
    "nl": "nl",
 | 
						|
    "pl": "pl",
 | 
						|
    "szl": "pl",
 | 
						|
    "pt": "pt_BR",
 | 
						|
    "pap": "pt_BR",
 | 
						|
    "sv": "sv",
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
def request(query, params):
 | 
						|
 | 
						|
    # missing attributes: user_id, app_version
 | 
						|
    # - user_id=451561-497874-703312-310156
 | 
						|
    # - app_version=1740727428
 | 
						|
 | 
						|
    args = {
 | 
						|
        "q": query,
 | 
						|
        "offset": (params['pageno'] - 1) * results_per_page,
 | 
						|
        "limit": results_per_page,
 | 
						|
        "facet": soundcloud_facet,
 | 
						|
        "client_id": guest_client_id,
 | 
						|
        "app_locale": app_locale_map.get(params["language"].split("-")[0], "en"),
 | 
						|
    }
 | 
						|
 | 
						|
    params['url'] = f"{search_url}?{urlencode(args)}"
 | 
						|
    return params
 | 
						|
 | 
						|
 | 
						|
def response(resp):
 | 
						|
    results = []
 | 
						|
    data = resp.json()
 | 
						|
 | 
						|
    for result in data.get("collection", []):
 | 
						|
 | 
						|
        if result["kind"] in ("track", "playlist"):
 | 
						|
            url = result.get("permalink_url")
 | 
						|
            if not url:
 | 
						|
                continue
 | 
						|
            uri = quote_plus(result.get("uri"))
 | 
						|
            content = [
 | 
						|
                result.get("description"),
 | 
						|
                result.get("label_name"),
 | 
						|
            ]
 | 
						|
            res = {
 | 
						|
                "url": url,
 | 
						|
                "title": result["title"],
 | 
						|
                "content": " / ".join([c for c in content if c]),
 | 
						|
                "publishedDate": parser.parse(result["last_modified"]),
 | 
						|
                "iframe_src": "https://w.soundcloud.com/player/?url=" + uri,
 | 
						|
                "views": result.get("likes_count"),
 | 
						|
            }
 | 
						|
            thumbnail = result["artwork_url"] or result["user"]["avatar_url"]
 | 
						|
            res["thumbnail"] = thumbnail or None
 | 
						|
            length = int(result.get("duration", 0) / 1000)
 | 
						|
            if length:
 | 
						|
                length = datetime.timedelta(seconds=length)
 | 
						|
                res["length"] = length
 | 
						|
            res["views"] = result.get("playback_count", 0) or None
 | 
						|
            res["author"] = result.get("user", {}).get("full_name") or None
 | 
						|
            results.append(res)
 | 
						|
 | 
						|
    return results
 | 
						|
 | 
						|
 | 
						|
def init(engine_settings=None):  # pylint: disable=unused-argument
 | 
						|
    global guest_client_id  # pylint: disable=global-statement
 | 
						|
    guest_client_id = get_client_id()
 | 
						|
 | 
						|
 | 
						|
def get_client_id() -> str:
 | 
						|
 | 
						|
    client_id = ""
 | 
						|
    url = "https://soundcloud.com"
 | 
						|
    resp = http_get(url, timeout=10)
 | 
						|
 | 
						|
    if not resp.ok:
 | 
						|
        logger.error("init: GET %s failed", url)
 | 
						|
        return client_id
 | 
						|
 | 
						|
    tree = html.fromstring(resp.content)
 | 
						|
    script_tags = tree.xpath("//script[contains(@src, '/assets/')]")
 | 
						|
    app_js_urls = [tag.get("src") for tag in script_tags if tag is not None]
 | 
						|
 | 
						|
    # extracts valid app_js urls from soundcloud.com content
 | 
						|
 | 
						|
    for url in app_js_urls[::-1]:
 | 
						|
 | 
						|
        # gets app_js and search for the client_id
 | 
						|
        resp = http_get(url)
 | 
						|
 | 
						|
        if not resp.ok:
 | 
						|
            logger.error("init: app_js GET %s failed", url)
 | 
						|
            continue
 | 
						|
 | 
						|
        cids = cid_re.search(resp.content.decode())
 | 
						|
        if cids and len(cids.groups()):
 | 
						|
            client_id = cids.groups()[0]
 | 
						|
            break
 | 
						|
 | 
						|
    if client_id:
 | 
						|
        logger.info("using client_id '%s' for soundclud queries", client_id)
 | 
						|
    else:
 | 
						|
        logger.warning("missing valid client_id for soundclud queries")
 | 
						|
    return client_id
 |