mirror of
				https://github.com/searxng/searxng.git
				synced 2025-11-04 03:27:06 -05:00 
			
		
		
		
	Dokuwiki searches behind reverse proxy had duplicate base path in the url, creating a wrong url. This patch exchanges string concat of URLs with urljoin [1] from urllib.parse. This eliminates the dual problem, while retaining the old functionality designed to concatenate protocol, hostname and port (as base_url) with path. [1] https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin Closes: https://github.com/searxng/searxng/issues/4598
		
			
				
	
	
		
			88 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			88 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
# SPDX-License-Identifier: AGPL-3.0-or-later
 | 
						|
"""
 | 
						|
 Doku Wiki
 | 
						|
"""
 | 
						|
 | 
						|
from urllib.parse import urlencode
 | 
						|
from urllib.parse import urljoin
 | 
						|
from lxml.html import fromstring
 | 
						|
from searx.utils import extract_text, eval_xpath
 | 
						|
 | 
						|
# about
 | 
						|
about = {
 | 
						|
    "website": 'https://www.dokuwiki.org/',
 | 
						|
    "wikidata_id": 'Q851864',
 | 
						|
    "official_api_documentation": 'https://www.dokuwiki.org/devel:xmlrpc',
 | 
						|
    "use_official_api": False,
 | 
						|
    "require_api_key": False,
 | 
						|
    "results": 'HTML',
 | 
						|
}
 | 
						|
 | 
						|
# engine dependent config
 | 
						|
categories = ['general']  # 'images', 'music', 'videos', 'files'
 | 
						|
paging = False
 | 
						|
number_of_results = 5
 | 
						|
 | 
						|
# search-url
 | 
						|
# Doku is OpenSearch compatible
 | 
						|
base_url = 'http://localhost:8090'
 | 
						|
search_url = (
 | 
						|
    # fmt: off
 | 
						|
    '/?do=search'
 | 
						|
    '&{query}'
 | 
						|
    # fmt: on
 | 
						|
)
 | 
						|
# '&startRecord={offset}'
 | 
						|
# '&maximumRecords={limit}'
 | 
						|
 | 
						|
 | 
						|
# do search-request
 | 
						|
def request(query, params):
 | 
						|
 | 
						|
    params['url'] = base_url + search_url.format(query=urlencode({'id': query}))
 | 
						|
 | 
						|
    return params
 | 
						|
 | 
						|
 | 
						|
# get response from search-request
 | 
						|
def response(resp):
 | 
						|
    results = []
 | 
						|
 | 
						|
    doc = fromstring(resp.text)
 | 
						|
 | 
						|
    # parse results
 | 
						|
    # Quickhits
 | 
						|
    for r in eval_xpath(doc, '//div[@class="search_quickresult"]/ul/li'):
 | 
						|
        try:
 | 
						|
            res_url = eval_xpath(r, './/a[@class="wikilink1"]/@href')[-1]
 | 
						|
        except:  # pylint: disable=bare-except
 | 
						|
            continue
 | 
						|
 | 
						|
        if not res_url:
 | 
						|
            continue
 | 
						|
 | 
						|
        title = extract_text(eval_xpath(r, './/a[@class="wikilink1"]/@title'))
 | 
						|
 | 
						|
        # append result
 | 
						|
        results.append({'title': title, 'content': "", 'url': urljoin(base_url, res_url)})
 | 
						|
 | 
						|
    # Search results
 | 
						|
    for r in eval_xpath(doc, '//dl[@class="search_results"]/*'):
 | 
						|
        try:
 | 
						|
            if r.tag == "dt":
 | 
						|
                res_url = eval_xpath(r, './/a[@class="wikilink1"]/@href')[-1]
 | 
						|
                title = extract_text(eval_xpath(r, './/a[@class="wikilink1"]/@title'))
 | 
						|
            elif r.tag == "dd":
 | 
						|
                content = extract_text(eval_xpath(r, '.'))
 | 
						|
 | 
						|
                # append result
 | 
						|
                results.append({'title': title, 'content': content, 'url': urljoin(base_url, res_url)})
 | 
						|
        except:  # pylint: disable=bare-except
 | 
						|
            continue
 | 
						|
 | 
						|
        if not res_url:
 | 
						|
            continue
 | 
						|
 | 
						|
    # return results
 | 
						|
    return results
 |