mirror of
https://github.com/searxng/searxng.git
synced 2025-10-23 23:09:02 -04:00
[mod] stackoverflow & yandex: detect CAPTCHA response
This commit is contained in:
parent
7905d41487
commit
fa909c7c02
@ -10,9 +10,10 @@
|
|||||||
@parse url, title, content
|
@parse url, title, content
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib.parse import urlencode, urljoin
|
from urllib.parse import urlencode, urljoin, urlparse
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.utils import extract_text
|
from searx.utils import extract_text
|
||||||
|
from searx.exceptions import SearxEngineCaptchaException
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['it']
|
categories = ['it']
|
||||||
@ -37,6 +38,10 @@ def request(query, params):
|
|||||||
|
|
||||||
# get response from search-request
|
# get response from search-request
|
||||||
def response(resp):
|
def response(resp):
|
||||||
|
resp_url = urlparse(resp.url)
|
||||||
|
if resp_url.path.startswith('/nocaptcha'):
|
||||||
|
raise SearxEngineCaptchaException()
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
dom = html.fromstring(resp.text)
|
||||||
|
@ -9,9 +9,10 @@
|
|||||||
@parse url, title, content
|
@parse url, title, content
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode, urlparse
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx import logger
|
from searx import logger
|
||||||
|
from searx.exceptions import SearxEngineCaptchaException
|
||||||
|
|
||||||
logger = logger.getChild('yandex engine')
|
logger = logger.getChild('yandex engine')
|
||||||
|
|
||||||
@ -47,6 +48,10 @@ def request(query, params):
|
|||||||
|
|
||||||
# get response from search-request
|
# get response from search-request
|
||||||
def response(resp):
|
def response(resp):
|
||||||
|
resp_url = urlparse(resp.url)
|
||||||
|
if resp_url.path.startswith('/showcaptcha'):
|
||||||
|
raise SearxEngineCaptchaException()
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
dom = html.fromstring(resp.text)
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user