Escape html text in result body (#912)

Moved the cleaner functions to app/utils/escaper.py

Removed unused import 're'

Moved the cleaner functionalities to the "search.py" and "routes.py"

Making sure escaped chars stay escaped during process

Replaced "<" and ">" with "andlt;" and "andgt;", respectively. This way,
when the 'response' object get loaded to bsoup (which happens several times
throughout the process between search.py and routes.py), bsoup will not
unescape them.
This commit is contained in:
Ahmad Alkadri 2022-12-29 23:19:28 +01:00 committed by GitHub
parent 08aa1ab8f1
commit 3dda8b25ef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 6 additions and 4 deletions

View File

@ -284,7 +284,6 @@ def autocomplete():
g.user_request.autocomplete(q) if not g.user_config.tor else [] g.user_request.autocomplete(q) if not g.user_config.tor else []
]) ])
@app.route(f'/{Endpoint.search}', methods=['GET', 'POST']) @app.route(f'/{Endpoint.search}', methods=['GET', 'POST'])
@session_required @session_required
@auth_required @auth_required
@ -323,6 +322,7 @@ def search():
soup = bsoup(response, "html.parser"); soup = bsoup(response, "html.parser");
for x in soup.find_all(attrs={"id": "st-card"}): for x in soup.find_all(attrs={"id": "st-card"}):
x.replace_with("") x.replace_with("")
response = str(soup) response = str(soup)
# Return 503 if temporarily blocked by captcha # Return 503 if temporarily blocked by captcha
@ -336,6 +336,7 @@ def search():
config=g.user_config, config=g.user_config,
query=urlparse.unquote(query), query=urlparse.unquote(query),
params=g.user_config.to_params(keys=['preferences'])), 503 params=g.user_config.to_params(keys=['preferences'])), 503
response = bold_search_terms(response, query) response = bold_search_terms(response, query)
# Feature to display IP address # Feature to display IP address
@ -358,6 +359,7 @@ def search():
preferences = g.user_config.preferences preferences = g.user_config.preferences
home_url = f"home?preferences={preferences}" if preferences else "home" home_url = f"home?preferences={preferences}" if preferences else "home"
cleanresponse = str(response).replace("andlt;","<").replace("andgt;",">")
return render_template( return render_template(
'display.html', 'display.html',
@ -378,7 +380,7 @@ def search():
is_translation=any( is_translation=any(
_ in query.lower() for _ in [translation['translate'], 'translate'] _ in query.lower() for _ in [translation['translate'], 'translate']
) and not search_util.search_type, # Standard search queries only ) and not search_util.search_type, # Standard search queries only
response=response, response=cleanresponse,
version_number=app.config['VERSION_NUMBER'], version_number=app.config['VERSION_NUMBER'],
search_header=render_template( search_header=render_template(
'header.html', 'header.html',

View File

@ -1,7 +1,6 @@
import os import os
import re import re
from typing import Any from typing import Any
from app.filter import Filter from app.filter import Filter
from app.request import gen_query from app.request import gen_query
from app.utils.misc import get_proxy_host_url from app.utils.misc import get_proxy_host_url
@ -142,7 +141,8 @@ class Search:
force_mobile=view_image) force_mobile=view_image)
# Produce cleanable html soup from response # Produce cleanable html soup from response
html_soup = bsoup(get_body.text, 'html.parser') get_body_safed = get_body.text.replace("<","andlt;").replace(">","andgt;")
html_soup = bsoup(get_body_safed, 'html.parser')
# Replace current soup if view_image is active # Replace current soup if view_image is active
if view_image: if view_image: