From 7a1b959646c45a81d3495148b1fa6c2da585eb59 Mon Sep 17 00:00:00 2001 From: Austin-Olacsi <138650713+Austin-Olacsi@users.noreply.github.com> Date: Mon, 10 Nov 2025 09:27:00 -0700 Subject: [PATCH] [fix] hackernews contains HTML escape codes --- searx/engines/hackernews.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/searx/engines/hackernews.py b/searx/engines/hackernews.py index 3b0dd2d87..8ee20f7d2 100644 --- a/searx/engines/hackernews.py +++ b/searx/engines/hackernews.py @@ -6,6 +6,7 @@ from urllib.parse import urlencode from dateutil.relativedelta import relativedelta from flask_babel import gettext +from searx.utils import html_to_text # Engine metadata about = { @@ -75,6 +76,7 @@ def response(resp): object_id = hit["objectID"] points = hit.get("points") or 0 num_comments = hit.get("num_comments") or 0 + content = hit.get("url") or html_to_text(hit.get("comment_text")) or html_to_text(hit.get("story_text")) metadata = "" if points != 0 or num_comments != 0: @@ -83,7 +85,7 @@ def response(resp): { "title": hit.get("title") or f"{gettext('author')}: {hit['author']}", "url": f"https://news.ycombinator.com/item?id={object_id}", - "content": hit.get("url") or hit.get("comment_text") or hit.get("story_text") or "", + "content": content, "metadata": metadata, "author": hit["author"], "publishedDate": datetime.fromtimestamp(hit["created_at_i"]),