mirror of
https://github.com/searxng/searxng.git
synced 2025-11-26 00:05:08 -05:00
[fix] brave engines - web, images & videos (#5478)
brave web:
xpath selectors needed to be justified
brave images & videos:
The JS code with the JS object was read incorrectly; not always, but quite
often, it led to exceptions when the Python data structure was created from it.
BTW: A complete review was conducted and corrections or additions were made to
the type definitions.
To test all brave engines in once::
!br !brimg !brvid !brnews weather
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
54a97e1043
commit
ebb9ea4571
@ -124,17 +124,17 @@ from urllib.parse import (
|
|||||||
urlparse,
|
urlparse,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
import json
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
|
||||||
from searx import locales
|
from searx import locales
|
||||||
from searx.utils import (
|
from searx.utils import (
|
||||||
extr,
|
|
||||||
extract_text,
|
extract_text,
|
||||||
eval_xpath,
|
|
||||||
eval_xpath_list,
|
eval_xpath_list,
|
||||||
eval_xpath_getindex,
|
eval_xpath_getindex,
|
||||||
js_obj_str_to_python,
|
js_obj_str_to_python,
|
||||||
|
js_obj_str_to_json_str,
|
||||||
get_embeded_stream_url,
|
get_embeded_stream_url,
|
||||||
)
|
)
|
||||||
from searx.enginelib.traits import EngineTraits
|
from searx.enginelib.traits import EngineTraits
|
||||||
@ -142,17 +142,17 @@ from searx.result_types import EngineResults
|
|||||||
from searx.extended_types import SXNG_Response
|
from searx.extended_types import SXNG_Response
|
||||||
|
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://search.brave.com/',
|
"website": "https://search.brave.com/",
|
||||||
"wikidata_id": 'Q22906900',
|
"wikidata_id": "Q22906900",
|
||||||
"official_api_documentation": None,
|
"official_api_documentation": None,
|
||||||
"use_official_api": False,
|
"use_official_api": False,
|
||||||
"require_api_key": False,
|
"require_api_key": False,
|
||||||
"results": 'HTML',
|
"results": "HTML",
|
||||||
}
|
}
|
||||||
|
|
||||||
base_url = "https://search.brave.com/"
|
base_url = "https://search.brave.com/"
|
||||||
categories = []
|
categories = []
|
||||||
brave_category: t.Literal["search", "videos", "images", "news", "goggles"] = 'search'
|
brave_category: t.Literal["search", "videos", "images", "news", "goggles"] = "search"
|
||||||
"""Brave supports common web-search, videos, images, news, and goggles search.
|
"""Brave supports common web-search, videos, images, news, and goggles search.
|
||||||
|
|
||||||
- ``search``: Common WEB search
|
- ``search``: Common WEB search
|
||||||
@ -182,71 +182,86 @@ to do more won't return any result and you will most likely be flagged as a bot.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
safesearch = True
|
safesearch = True
|
||||||
safesearch_map = {2: 'strict', 1: 'moderate', 0: 'off'} # cookie: safesearch=off
|
safesearch_map = {2: "strict", 1: "moderate", 0: "off"} # cookie: safesearch=off
|
||||||
|
|
||||||
time_range_support = False
|
time_range_support = False
|
||||||
"""Brave only supports time-range in :py:obj:`brave_category` ``search`` (UI
|
"""Brave only supports time-range in :py:obj:`brave_category` ``search`` (UI
|
||||||
category All) and in the goggles category."""
|
category All) and in the goggles category."""
|
||||||
|
|
||||||
time_range_map: dict[str, str] = {
|
time_range_map: dict[str, str] = {
|
||||||
'day': 'pd',
|
"day": "pd",
|
||||||
'week': 'pw',
|
"week": "pw",
|
||||||
'month': 'pm',
|
"month": "pm",
|
||||||
'year': 'py',
|
"year": "py",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def request(query: str, params: dict[str, t.Any]) -> None:
|
def request(query: str, params: dict[str, t.Any]) -> None:
|
||||||
|
|
||||||
args: dict[str, t.Any] = {
|
args: dict[str, t.Any] = {
|
||||||
'q': query,
|
"q": query,
|
||||||
'source': 'web',
|
"source": "web",
|
||||||
}
|
}
|
||||||
if brave_spellcheck:
|
if brave_spellcheck:
|
||||||
args['spellcheck'] = '1'
|
args["spellcheck"] = "1"
|
||||||
|
|
||||||
if brave_category in ('search', 'goggles'):
|
if brave_category in ("search", "goggles"):
|
||||||
if params.get('pageno', 1) - 1:
|
if params.get("pageno", 1) - 1:
|
||||||
args['offset'] = params.get('pageno', 1) - 1
|
args["offset"] = params.get("pageno", 1) - 1
|
||||||
if time_range_map.get(params['time_range']):
|
if time_range_map.get(params["time_range"]):
|
||||||
args['tf'] = time_range_map.get(params['time_range'])
|
args["tf"] = time_range_map.get(params["time_range"])
|
||||||
|
|
||||||
if brave_category == 'goggles':
|
if brave_category == "goggles":
|
||||||
args['goggles_id'] = Goggles
|
args["goggles_id"] = Goggles
|
||||||
|
|
||||||
params["url"] = f"{base_url}{brave_category}?{urlencode(args)}"
|
params["url"] = f"{base_url}{brave_category}?{urlencode(args)}"
|
||||||
|
logger.debug("url %s", params["url"])
|
||||||
|
|
||||||
# set properties in the cookies
|
# set properties in the cookies
|
||||||
|
|
||||||
params['cookies']['safesearch'] = safesearch_map.get(params['safesearch'], 'off')
|
params["cookies"]["safesearch"] = safesearch_map.get(params["safesearch"], "off")
|
||||||
# the useLocation is IP based, we use cookie 'country' for the region
|
# the useLocation is IP based, we use cookie "country" for the region
|
||||||
params['cookies']['useLocation'] = '0'
|
params["cookies"]["useLocation"] = "0"
|
||||||
params['cookies']['summarizer'] = '0'
|
params["cookies"]["summarizer"] = "0"
|
||||||
|
|
||||||
engine_region = traits.get_region(params['searxng_locale'], 'all')
|
engine_region = traits.get_region(params["searxng_locale"], "all")
|
||||||
params['cookies']['country'] = engine_region.split('-')[-1].lower() # type: ignore
|
params["cookies"]["country"] = engine_region.split("-")[-1].lower() # type: ignore
|
||||||
|
|
||||||
ui_lang = locales.get_engine_locale(params['searxng_locale'], traits.custom["ui_lang"], 'en-us')
|
ui_lang = locales.get_engine_locale(params["searxng_locale"], traits.custom["ui_lang"], "en-us")
|
||||||
params['cookies']['ui_lang'] = ui_lang
|
params["cookies"]["ui_lang"] = ui_lang
|
||||||
|
logger.debug("cookies %s", params["cookies"])
|
||||||
logger.debug("cookies %s", params['cookies'])
|
|
||||||
|
|
||||||
params['headers']['Sec-Fetch-Dest'] = "document"
|
|
||||||
params['headers']['Sec-Fetch-Mode'] = "navigate"
|
|
||||||
params['headers']['Sec-Fetch-Site'] = "same-origin"
|
|
||||||
params['headers']['Sec-Fetch-User'] = "?1"
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_published_date(published_date_raw):
|
def _extract_published_date(published_date_raw: str | None):
|
||||||
if published_date_raw is None:
|
if published_date_raw is None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return parser.parse(published_date_raw)
|
return parser.parse(published_date_raw)
|
||||||
except parser.ParserError:
|
except parser.ParserError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_json_data(text: str) -> dict[str, t.Any]:
|
||||||
|
# Example script source containing the data:
|
||||||
|
#
|
||||||
|
# kit.start(app, element, {
|
||||||
|
# node_ids: [0, 19],
|
||||||
|
# data: [{type:"data",data: .... ["q","goggles_id"],route:1,url:1}}]
|
||||||
|
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
text = text[text.index("<script") : text.index("</script")]
|
||||||
|
if not text:
|
||||||
|
raise ValueError("can't find JS/JSON data in the given text")
|
||||||
|
start = text.index("data: [{")
|
||||||
|
end = text.rindex("}}]")
|
||||||
|
js_obj_str = text[start:end]
|
||||||
|
js_obj_str = "{" + js_obj_str + "}}]}"
|
||||||
|
# js_obj_str = js_obj_str.replace("\xa0", "") # remove ASCII for
|
||||||
|
# js_obj_str = js_obj_str.replace(r"\u003C", "<").replace(r"\u003c", "<") # fix broken HTML tags in strings
|
||||||
|
json_str = js_obj_str_to_json_str(js_obj_str)
|
||||||
|
data: dict[str, t.Any] = json.loads(json_str)
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
def response(resp: SXNG_Response) -> EngineResults:
|
def response(resp: SXNG_Response) -> EngineResults:
|
||||||
|
|
||||||
if brave_category in ('search', 'goggles'):
|
if brave_category in ('search', 'goggles'):
|
||||||
@ -261,11 +276,8 @@ def response(resp: SXNG_Response) -> EngineResults:
|
|||||||
# node_ids: [0, 19],
|
# node_ids: [0, 19],
|
||||||
# data: [{type:"data",data: .... ["q","goggles_id"],route:1,url:1}}]
|
# data: [{type:"data",data: .... ["q","goggles_id"],route:1,url:1}}]
|
||||||
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
js_object = "[{" + extr(resp.text, "data: [{", "}}],") + "}}]"
|
json_data: dict[str, t.Any] = extract_json_data(resp.text)
|
||||||
json_data = js_obj_str_to_python(js_object)
|
json_resp: dict[str, t.Any] = json_data['data'][1]["data"]['body']['response']
|
||||||
|
|
||||||
# json_data is a list and at the second position (0,1) in this list we find the "response" data we need ..
|
|
||||||
json_resp = json_data[1]['data']['body']['response']
|
|
||||||
|
|
||||||
if brave_category == 'images':
|
if brave_category == 'images':
|
||||||
return _parse_images(json_resp)
|
return _parse_images(json_resp)
|
||||||
@ -275,150 +287,121 @@ def response(resp: SXNG_Response) -> EngineResults:
|
|||||||
raise ValueError(f"Unsupported brave category: {brave_category}")
|
raise ValueError(f"Unsupported brave category: {brave_category}")
|
||||||
|
|
||||||
|
|
||||||
def _parse_search(resp) -> EngineResults:
|
def _parse_search(resp: SXNG_Response) -> EngineResults:
|
||||||
result_list = EngineResults()
|
res = EngineResults()
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
dom = html.fromstring(resp.text)
|
||||||
|
|
||||||
# I doubt that Brave is still providing the "answer" class / I haven't seen
|
for result in eval_xpath_list(dom, "//div[contains(@class, 'snippet ')]"):
|
||||||
# answers in brave for a long time.
|
|
||||||
answer_tag = eval_xpath_getindex(dom, '//div[@class="answer"]', 0, default=None)
|
|
||||||
if answer_tag:
|
|
||||||
url = eval_xpath_getindex(dom, '//div[@id="featured_snippet"]/a[@class="result-header"]/@href', 0, default=None)
|
|
||||||
answer = extract_text(answer_tag)
|
|
||||||
if answer is not None:
|
|
||||||
result_list.add(result_list.types.Answer(answer=answer, url=url))
|
|
||||||
|
|
||||||
# xpath_results = '//div[contains(@class, "snippet fdb") and @data-type="web"]'
|
url: str | None = eval_xpath_getindex(result, ".//a/@href", 0, default=None)
|
||||||
xpath_results = '//div[contains(@class, "snippet ")]'
|
title_tag = eval_xpath_getindex(result, ".//div[contains(@class, 'title')]", 0, default=None)
|
||||||
|
|
||||||
for result in eval_xpath_list(dom, xpath_results):
|
|
||||||
|
|
||||||
url = eval_xpath_getindex(result, './/a[contains(@class, "h")]/@href', 0, default=None)
|
|
||||||
title_tag = eval_xpath_getindex(
|
|
||||||
result, './/a[contains(@class, "h")]//div[contains(@class, "title")]', 0, default=None
|
|
||||||
)
|
|
||||||
if url is None or title_tag is None or not urlparse(url).netloc: # partial url likely means it's an ad
|
if url is None or title_tag is None or not urlparse(url).netloc: # partial url likely means it's an ad
|
||||||
continue
|
continue
|
||||||
|
|
||||||
content: str = extract_text(
|
content: str = ""
|
||||||
eval_xpath_getindex(result, './/div[contains(@class, "snippet-description")]', 0, default='')
|
pub_date = None
|
||||||
) # type: ignore
|
|
||||||
pub_date_raw = eval_xpath(result, 'substring-before(.//div[contains(@class, "snippet-description")], "-")')
|
|
||||||
pub_date = _extract_published_date(pub_date_raw)
|
|
||||||
if pub_date and content.startswith(pub_date_raw):
|
|
||||||
content = content.lstrip(pub_date_raw).strip("- \n\t")
|
|
||||||
|
|
||||||
thumbnail = eval_xpath_getindex(result, './/img[contains(@class, "thumb")]/@src', 0, default='')
|
_content = eval_xpath_getindex(result, ".//div[contains(@class, 'content')]", 0, default="")
|
||||||
|
if len(_content):
|
||||||
|
content = extract_text(_content) # type: ignore
|
||||||
|
_pub_date = extract_text(
|
||||||
|
eval_xpath_getindex(_content, ".//span[contains(@class, 't-secondary')]", 0, default="")
|
||||||
|
)
|
||||||
|
if _pub_date:
|
||||||
|
pub_date = _extract_published_date(_pub_date)
|
||||||
|
content = content.lstrip(_pub_date).strip("- \n\t")
|
||||||
|
|
||||||
item = {
|
thumbnail: str = eval_xpath_getindex(result, ".//a[contains(@class, 'thumbnail')]//img/@src", 0, default="")
|
||||||
'url': url,
|
|
||||||
'title': extract_text(title_tag),
|
item = res.types.LegacyResult(
|
||||||
'content': content,
|
template="default.html",
|
||||||
'publishedDate': pub_date,
|
url=url,
|
||||||
'thumbnail': thumbnail,
|
title=extract_text(title_tag),
|
||||||
}
|
content=content,
|
||||||
|
publishedDate=pub_date,
|
||||||
|
thumbnail=thumbnail,
|
||||||
|
)
|
||||||
|
res.add(item)
|
||||||
|
|
||||||
video_tag = eval_xpath_getindex(
|
video_tag = eval_xpath_getindex(
|
||||||
result, './/div[contains(@class, "video-snippet") and @data-macro="video"]', 0, default=None
|
result, ".//div[contains(@class, 'video-snippet') and @data-macro='video']", 0, default=[]
|
||||||
)
|
)
|
||||||
if video_tag is not None:
|
if len(video_tag):
|
||||||
|
|
||||||
# In my tests a video tag in the WEB search was most often not a
|
# In my tests a video tag in the WEB search was most often not a
|
||||||
# video, except the ones from youtube ..
|
# video, except the ones from youtube ..
|
||||||
|
|
||||||
iframe_src = get_embeded_stream_url(url)
|
iframe_src = get_embeded_stream_url(url)
|
||||||
if iframe_src:
|
if iframe_src:
|
||||||
item['iframe_src'] = iframe_src
|
item["iframe_src"] = iframe_src
|
||||||
item['template'] = 'videos.html'
|
item["template"] = "videos.html"
|
||||||
item['thumbnail'] = eval_xpath_getindex(video_tag, './/img/@src', 0, default='')
|
|
||||||
pub_date_raw = extract_text(
|
|
||||||
eval_xpath(video_tag, './/div[contains(@class, "snippet-attributes")]/div/text()')
|
|
||||||
)
|
|
||||||
item['publishedDate'] = _extract_published_date(pub_date_raw)
|
|
||||||
else:
|
|
||||||
item['thumbnail'] = eval_xpath_getindex(video_tag, './/img/@src', 0, default='')
|
|
||||||
|
|
||||||
result_list.append(item)
|
return res
|
||||||
|
|
||||||
return result_list
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_news(resp) -> EngineResults:
|
def _parse_news(resp: SXNG_Response) -> EngineResults:
|
||||||
|
res = EngineResults()
|
||||||
result_list = EngineResults()
|
|
||||||
dom = html.fromstring(resp.text)
|
dom = html.fromstring(resp.text)
|
||||||
|
|
||||||
for result in eval_xpath_list(dom, '//div[contains(@class, "results")]//div[@data-type="news"]'):
|
for result in eval_xpath_list(dom, "//div[contains(@class, 'results')]//div[@data-type='news']"):
|
||||||
|
|
||||||
# import pdb
|
url = eval_xpath_getindex(result, ".//a[contains(@class, 'result-header')]/@href", 0, default=None)
|
||||||
# pdb.set_trace()
|
|
||||||
|
|
||||||
url = eval_xpath_getindex(result, './/a[contains(@class, "result-header")]/@href', 0, default=None)
|
|
||||||
if url is None:
|
if url is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
title = extract_text(eval_xpath_list(result, './/span[contains(@class, "snippet-title")]'))
|
title = eval_xpath_list(result, ".//span[contains(@class, 'snippet-title')]")
|
||||||
content = extract_text(eval_xpath_list(result, './/p[contains(@class, "desc")]'))
|
content = eval_xpath_list(result, ".//p[contains(@class, 'desc')]")
|
||||||
thumbnail = eval_xpath_getindex(result, './/div[contains(@class, "image-wrapper")]//img/@src', 0, default='')
|
thumbnail = eval_xpath_getindex(result, ".//div[contains(@class, 'image-wrapper')]//img/@src", 0, default="")
|
||||||
|
|
||||||
item = {
|
item = res.types.LegacyResult(
|
||||||
"url": url,
|
template="default.html",
|
||||||
"title": title,
|
url=url,
|
||||||
"content": content,
|
title=extract_text(title),
|
||||||
"thumbnail": thumbnail,
|
thumbnail=thumbnail,
|
||||||
}
|
content=extract_text(content),
|
||||||
|
)
|
||||||
|
res.add(item)
|
||||||
|
|
||||||
result_list.append(item)
|
return res
|
||||||
|
|
||||||
return result_list
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_images(json_resp) -> EngineResults:
|
def _parse_images(json_resp: dict[str, t.Any]) -> EngineResults:
|
||||||
result_list = EngineResults()
|
res = EngineResults()
|
||||||
|
|
||||||
for result in json_resp["results"]:
|
for result in json_resp["results"]:
|
||||||
item = {
|
item = res.types.LegacyResult(
|
||||||
'url': result['url'],
|
template="images.html",
|
||||||
'title': result['title'],
|
url=result["url"],
|
||||||
'content': result['description'],
|
title=result["title"],
|
||||||
'template': 'images.html',
|
source=result["source"],
|
||||||
'resolution': result['properties']['format'],
|
img_src=result["properties"]["url"],
|
||||||
'source': result['source'],
|
thumbnail_src=result["thumbnail"]["src"],
|
||||||
'img_src': result['properties']['url'],
|
)
|
||||||
'thumbnail_src': result['thumbnail']['src'],
|
res.add(item)
|
||||||
}
|
|
||||||
result_list.append(item)
|
|
||||||
|
|
||||||
return result_list
|
return res
|
||||||
|
|
||||||
|
|
||||||
def _parse_videos(json_resp) -> EngineResults:
|
def _parse_videos(json_resp: dict[str, t.Any]) -> EngineResults:
|
||||||
result_list = EngineResults()
|
res = EngineResults()
|
||||||
|
|
||||||
for result in json_resp["results"]:
|
for result in json_resp["results"]:
|
||||||
|
item = res.types.LegacyResult(
|
||||||
url = result['url']
|
template="videos.html",
|
||||||
item = {
|
url=result["url"],
|
||||||
'url': url,
|
title=result["title"],
|
||||||
'title': result['title'],
|
content=result["description"],
|
||||||
'content': result['description'],
|
length=result["video"]["duration"],
|
||||||
'template': 'videos.html',
|
duration=result["video"]["duration"],
|
||||||
'length': result['video']['duration'],
|
publishedDate=_extract_published_date(result["age"]),
|
||||||
'duration': result['video']['duration'],
|
)
|
||||||
'publishedDate': _extract_published_date(result['age']),
|
if result["thumbnail"] is not None:
|
||||||
}
|
item["thumbnail"] = result["thumbnail"]["src"]
|
||||||
|
iframe_src = get_embeded_stream_url(result["url"])
|
||||||
if result['thumbnail'] is not None:
|
|
||||||
item['thumbnail'] = result['thumbnail']['src']
|
|
||||||
|
|
||||||
iframe_src = get_embeded_stream_url(url)
|
|
||||||
if iframe_src:
|
if iframe_src:
|
||||||
item['iframe_src'] = iframe_src
|
item["iframe_src"] = iframe_src
|
||||||
|
|
||||||
result_list.append(item)
|
res.add(item)
|
||||||
|
|
||||||
return result_list
|
return res
|
||||||
|
|
||||||
|
|
||||||
def fetch_traits(engine_traits: EngineTraits):
|
def fetch_traits(engine_traits: EngineTraits):
|
||||||
@ -443,21 +426,21 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||||||
print("ERROR: response from Brave is not OK.")
|
print("ERROR: response from Brave is not OK.")
|
||||||
dom = html.fromstring(resp.text)
|
dom = html.fromstring(resp.text)
|
||||||
|
|
||||||
for option in dom.xpath('//section//option[@value="en-us"]/../option'):
|
for option in dom.xpath("//section//option[@value='en-us']/../option"):
|
||||||
|
|
||||||
ui_lang = option.get('value')
|
ui_lang = option.get("value")
|
||||||
try:
|
try:
|
||||||
l = babel.Locale.parse(ui_lang, sep='-')
|
l = babel.Locale.parse(ui_lang, sep="-")
|
||||||
if l.territory:
|
if l.territory:
|
||||||
sxng_tag = region_tag(babel.Locale.parse(ui_lang, sep='-'))
|
sxng_tag = region_tag(babel.Locale.parse(ui_lang, sep="-"))
|
||||||
else:
|
else:
|
||||||
sxng_tag = language_tag(babel.Locale.parse(ui_lang, sep='-'))
|
sxng_tag = language_tag(babel.Locale.parse(ui_lang, sep="-"))
|
||||||
|
|
||||||
except babel.UnknownLocaleError:
|
except babel.UnknownLocaleError:
|
||||||
print("ERROR: can't determine babel locale of Brave's (UI) language %s" % ui_lang)
|
print("ERROR: can't determine babel locale of Brave's (UI) language %s" % ui_lang)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
conflict = engine_traits.custom["ui_lang"].get(sxng_tag)
|
conflict = engine_traits.custom["ui_lang"].get(sxng_tag) # type: ignore
|
||||||
if conflict:
|
if conflict:
|
||||||
if conflict != ui_lang:
|
if conflict != ui_lang:
|
||||||
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, ui_lang))
|
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, ui_lang))
|
||||||
@ -466,26 +449,26 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||||||
|
|
||||||
# search regions of brave
|
# search regions of brave
|
||||||
|
|
||||||
resp = get('https://cdn.search.brave.com/serp/v2/_app/immutable/chunks/parameters.734c106a.js')
|
resp = get("https://cdn.search.brave.com/serp/v2/_app/immutable/chunks/parameters.734c106a.js")
|
||||||
|
|
||||||
if not resp.ok:
|
if not resp.ok:
|
||||||
print("ERROR: response from Brave is not OK.")
|
print("ERROR: response from Brave is not OK.")
|
||||||
|
|
||||||
country_js = resp.text[resp.text.index("options:{all") + len('options:') :]
|
country_js = resp.text[resp.text.index("options:{all") + len("options:") :]
|
||||||
country_js = country_js[: country_js.index("},k={default")]
|
country_js = country_js[: country_js.index("},k={default")]
|
||||||
country_tags = js_obj_str_to_python(country_js)
|
country_tags = js_obj_str_to_python(country_js)
|
||||||
|
|
||||||
for k, v in country_tags.items():
|
for k, v in country_tags.items():
|
||||||
if k == 'all':
|
if k == "all":
|
||||||
engine_traits.all_locale = 'all'
|
engine_traits.all_locale = "all"
|
||||||
continue
|
continue
|
||||||
country_tag = v['value']
|
country_tag = v["value"]
|
||||||
|
|
||||||
# add official languages of the country ..
|
# add official languages of the country ..
|
||||||
for lang_tag in babel.languages.get_official_languages(country_tag, de_facto=True):
|
for lang_tag in babel.languages.get_official_languages(country_tag, de_facto=True):
|
||||||
lang_tag = lang_map.get(lang_tag, lang_tag)
|
lang_tag = lang_map.get(lang_tag, lang_tag)
|
||||||
sxng_tag = region_tag(babel.Locale.parse('%s_%s' % (lang_tag, country_tag.upper())))
|
sxng_tag = region_tag(babel.Locale.parse("%s_%s" % (lang_tag, country_tag.upper())))
|
||||||
# print("%-20s: %s <-- %s" % (v['label'], country_tag, sxng_tag))
|
# print("%-20s: %s <-- %s" % (v["label"], country_tag, sxng_tag))
|
||||||
|
|
||||||
conflict = engine_traits.regions.get(sxng_tag)
|
conflict = engine_traits.regions.get(sxng_tag)
|
||||||
if conflict:
|
if conflict:
|
||||||
|
|||||||
@ -155,8 +155,15 @@ class OnlineProcessor(EngineProcessor):
|
|||||||
search_query.locale.language,
|
search_query.locale.language,
|
||||||
)
|
)
|
||||||
headers["Accept-Language"] = ac_lang
|
headers["Accept-Language"] = ac_lang
|
||||||
|
|
||||||
self.logger.debug("HTTP Accept-Language: %s", headers.get("Accept-Language", ""))
|
self.logger.debug("HTTP Accept-Language: %s", headers.get("Accept-Language", ""))
|
||||||
|
|
||||||
|
# https://developer.mozilla.org/en-US/docs/Glossary/Fetch_metadata_request_header
|
||||||
|
headers["Sec-Fetch-Dest"] = "empty"
|
||||||
|
headers["Sec-Fetch-Mode"] = "cors"
|
||||||
|
headers["Sec-Fetch-Site"] = "same-origin"
|
||||||
|
headers["Sec-Fetch-User"] = "?1"
|
||||||
|
headers["Sec-GPC"] = "1"
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
def _send_http_request(self, params: OnlineParams):
|
def _send_http_request(self, params: OnlineParams):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user