mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-26 00:02:31 -04:00 
			
		
		
		
	Merge branch 'master' into nyaa
This commit is contained in:
		
						commit
						c3232b0e1a
					
				| @ -1,62 +0,0 @@ | |||||||
| """ |  | ||||||
|  General Files (Files) |  | ||||||
| 
 |  | ||||||
|  @website     http://www.general-files.org |  | ||||||
|  @provide-api no (nothing found) |  | ||||||
| 
 |  | ||||||
|  @using-api   no (because nothing found) |  | ||||||
|  @results     HTML (using search portal) |  | ||||||
|  @stable      no (HTML can change) |  | ||||||
|  @parse       url, title, content |  | ||||||
| 
 |  | ||||||
|  @todo        detect torrents? |  | ||||||
| """ |  | ||||||
| 
 |  | ||||||
| from lxml import html |  | ||||||
| 
 |  | ||||||
| # engine dependent config |  | ||||||
| categories = ['files'] |  | ||||||
| paging = True |  | ||||||
| 
 |  | ||||||
| # search-url |  | ||||||
| base_url = 'http://www.general-file.com' |  | ||||||
| search_url = base_url + '/files-{letter}/{query}/{pageno}' |  | ||||||
| 
 |  | ||||||
| # specific xpath variables |  | ||||||
| result_xpath = '//table[@class="block-file"]' |  | ||||||
| title_xpath = './/h2/a//text()' |  | ||||||
| url_xpath = './/h2/a/@href' |  | ||||||
| content_xpath = './/p//text()' |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| # do search-request |  | ||||||
| def request(query, params): |  | ||||||
| 
 |  | ||||||
|     params['url'] = search_url.format(query=query, |  | ||||||
|                                       letter=query[0], |  | ||||||
|                                       pageno=params['pageno']) |  | ||||||
| 
 |  | ||||||
|     return params |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| # get response from search-request |  | ||||||
| def response(resp): |  | ||||||
|     results = [] |  | ||||||
| 
 |  | ||||||
|     dom = html.fromstring(resp.text) |  | ||||||
| 
 |  | ||||||
|     # parse results |  | ||||||
|     for result in dom.xpath(result_xpath): |  | ||||||
|         url = result.xpath(url_xpath)[0] |  | ||||||
| 
 |  | ||||||
|         # skip fast download links |  | ||||||
|         if not url.startswith('/'): |  | ||||||
|             continue |  | ||||||
| 
 |  | ||||||
|         # append result |  | ||||||
|         results.append({'url': base_url + url, |  | ||||||
|                         'title': ''.join(result.xpath(title_xpath)), |  | ||||||
|                         'content': ''.join(result.xpath(content_xpath))}) |  | ||||||
| 
 |  | ||||||
|     # return results |  | ||||||
|     return results |  | ||||||
| @ -10,6 +10,7 @@ | |||||||
|  @parse       url, title, content |  @parse       url, title, content | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
|  | import random | ||||||
| from json import loads | from json import loads | ||||||
| from time import time | from time import time | ||||||
| from lxml.html import fromstring | from lxml.html import fromstring | ||||||
| @ -32,7 +33,8 @@ search_string = 'search?{query}'\ | |||||||
|     '&qh=0'\ |     '&qh=0'\ | ||||||
|     '&qlang={lang}'\ |     '&qlang={lang}'\ | ||||||
|     '&ff={safesearch}'\ |     '&ff={safesearch}'\ | ||||||
|     '&rxikd={rxikd}'  # random number - 9 digits |     '&rxieu={rxieu}'\ | ||||||
|  |     '&rand={rxikd}'  # current unix timestamp | ||||||
| 
 | 
 | ||||||
| # specific xpath variables | # specific xpath variables | ||||||
| results_xpath = '//response//result' | results_xpath = '//response//result' | ||||||
| @ -59,10 +61,12 @@ def request(query, params): | |||||||
|     else: |     else: | ||||||
|         safesearch = 0 |         safesearch = 0 | ||||||
| 
 | 
 | ||||||
|  |     # rxieu is some kind of hash from the search query, but accepts random atm | ||||||
|     search_path = search_string.format(query=urlencode({'q': query}), |     search_path = search_string.format(query=urlencode({'q': query}), | ||||||
|                                        offset=offset, |                                        offset=offset, | ||||||
|                                        number_of_results=number_of_results, |                                        number_of_results=number_of_results, | ||||||
|                                        rxikd=str(time())[:9], |                                        rxikd=int(time() * 1000), | ||||||
|  |                                        rxieu=random.randint(1000000000, 9999999999), | ||||||
|                                        lang=language, |                                        lang=language, | ||||||
|                                        safesearch=safesearch) |                                        safesearch=safesearch) | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -67,8 +67,8 @@ def response(resp): | |||||||
|     for result in dom.xpath('//div[@class="g"]|//div[@class="g _cy"]'): |     for result in dom.xpath('//div[@class="g"]|//div[@class="g _cy"]'): | ||||||
|         try: |         try: | ||||||
|             r = { |             r = { | ||||||
|                 'url': result.xpath('.//div[@class="_cnc"]//a/@href')[0], |                 'url': result.xpath('.//a[@class="l _PMs"]')[0].attrib.get("href"), | ||||||
|                 'title': ''.join(result.xpath('.//div[@class="_cnc"]//h3//text()')), |                 'title': ''.join(result.xpath('.//a[@class="l _PMs"]//text()')), | ||||||
|                 'content': ''.join(result.xpath('.//div[@class="st"]//text()')), |                 'content': ''.join(result.xpath('.//div[@class="st"]//text()')), | ||||||
|             } |             } | ||||||
|         except: |         except: | ||||||
|  | |||||||
| @ -242,15 +242,16 @@ engines: | |||||||
|     disabled: True |     disabled: True | ||||||
| 
 | 
 | ||||||
|   - name : gitlab |   - name : gitlab | ||||||
|     engine : xpath |     engine : json_engine | ||||||
|     paging : True |     paging : True | ||||||
|     search_url : https://gitlab.com/search?page={pageno}&search={query} |     search_url : https://gitlab.com/api/v4/projects?search={query}&page={pageno} | ||||||
|     url_xpath : //li[@class="project-row"]//a[@class="project"]/@href |     url_query : web_url | ||||||
|     title_xpath : //li[@class="project-row"]//span[contains(@class, "project-full-name")] |     title_query : name_with_namespace | ||||||
|     content_xpath : //li[@class="project-row"]//div[@class="description"]/p |     content_query : description | ||||||
|  |     page_size : 20 | ||||||
|     categories : it |     categories : it | ||||||
|     shortcut : gl |     shortcut : gl | ||||||
|     timeout : 5.0 |     timeout : 10.0 | ||||||
|     disabled : True |     disabled : True | ||||||
| 
 | 
 | ||||||
|   - name : github |   - name : github | ||||||
| @ -321,9 +322,9 @@ engines: | |||||||
|     engine : xpath |     engine : xpath | ||||||
|     paging : True |     paging : True | ||||||
|     search_url : https://geektimes.ru/search/page{pageno}/?q={query} |     search_url : https://geektimes.ru/search/page{pageno}/?q={query} | ||||||
|     url_xpath : //div[@class="search_results"]//a[@class="post__title_link"]/@href |     url_xpath : //article[contains(@class, "post")]//a[@class="post__title_link"]/@href | ||||||
|     title_xpath : //div[@class="search_results"]//a[@class="post__title_link"] |     title_xpath : //article[contains(@class, "post")]//a[@class="post__title_link"] | ||||||
|     content_xpath : //div[@class="search_results"]//div[contains(@class, "content")] |     content_xpath : //article[contains(@class, "post")]//div[contains(@class, "post__text")] | ||||||
|     categories : it |     categories : it | ||||||
|     timeout : 4.0 |     timeout : 4.0 | ||||||
|     disabled : True |     disabled : True | ||||||
| @ -333,9 +334,9 @@ engines: | |||||||
|     engine : xpath |     engine : xpath | ||||||
|     paging : True |     paging : True | ||||||
|     search_url : https://habrahabr.ru/search/page{pageno}/?q={query} |     search_url : https://habrahabr.ru/search/page{pageno}/?q={query} | ||||||
|     url_xpath : //div[@class="search_results"]//a[contains(@class, "post__title_link")]/@href |     url_xpath : //article[contains(@class, "post")]//a[@class="post__title_link"]/@href | ||||||
|     title_xpath : //div[@class="search_results"]//a[contains(@class, "post__title_link")] |     title_xpath : //article[contains(@class, "post")]//a[@class="post__title_link"] | ||||||
|     content_xpath : //div[@class="search_results"]//div[contains(@class, "content")] |     content_xpath : //article[contains(@class, "post")]//div[contains(@class, "post__text")] | ||||||
|     categories : it |     categories : it | ||||||
|     timeout : 4.0 |     timeout : 4.0 | ||||||
|     disabled : True |     disabled : True | ||||||
|  | |||||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user