mirror of
				https://github.com/searxng/searxng.git
				synced 2025-11-03 19:17:07 -05:00 
			
		
		
		
	Merge pull request #1199 from kvch/fix-microsoft-academic
Fix Microsoft Academic engine
This commit is contained in:
		
						commit
						f5be8206c8
					
				
							
								
								
									
										75
									
								
								searx/engines/microsoft_academic.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								searx/engines/microsoft_academic.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,75 @@
 | 
				
			|||||||
 | 
					"""
 | 
				
			||||||
 | 
					Microsoft Academic (Science)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@website     https://academic.microsoft.com
 | 
				
			||||||
 | 
					@provide-api yes
 | 
				
			||||||
 | 
					@using-api   no
 | 
				
			||||||
 | 
					@results     JSON
 | 
				
			||||||
 | 
					@stable      no
 | 
				
			||||||
 | 
					@parse       url, title, content
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from datetime import datetime
 | 
				
			||||||
 | 
					from json import loads
 | 
				
			||||||
 | 
					from uuid import uuid4
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from searx.url_utils import urlencode
 | 
				
			||||||
 | 
					from searx.utils import html_to_text
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					categories = ['images']
 | 
				
			||||||
 | 
					paging = True
 | 
				
			||||||
 | 
					result_url = 'https://academic.microsoft.com/api/search/GetEntityResults?{query}'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def request(query, params):
 | 
				
			||||||
 | 
					    correlation_id = uuid4()
 | 
				
			||||||
 | 
					    msacademic = uuid4()
 | 
				
			||||||
 | 
					    time_now = datetime.now()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    params['url'] = result_url.format(query=urlencode({'correlationId': correlation_id}))
 | 
				
			||||||
 | 
					    params['cookies']['msacademic'] = str(msacademic)
 | 
				
			||||||
 | 
					    params['cookies']['ai_user'] = 'vhd0H|{now}'.format(now=str(time_now))
 | 
				
			||||||
 | 
					    params['method'] = 'POST'
 | 
				
			||||||
 | 
					    params['data'] = {
 | 
				
			||||||
 | 
					        'Query': '@{query}@'.format(query=query),
 | 
				
			||||||
 | 
					        'Limit': 10,
 | 
				
			||||||
 | 
					        'Offset': params['pageno'] - 1,
 | 
				
			||||||
 | 
					        'Filters': '',
 | 
				
			||||||
 | 
					        'OrderBy': '',
 | 
				
			||||||
 | 
					        'SortAscending': False,
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return params
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def response(resp):
 | 
				
			||||||
 | 
					    results = []
 | 
				
			||||||
 | 
					    response_data = loads(resp.text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for result in response_data['results']:
 | 
				
			||||||
 | 
					        url = _get_url(result)
 | 
				
			||||||
 | 
					        title = result['e']['dn']
 | 
				
			||||||
 | 
					        content = _get_content(result)
 | 
				
			||||||
 | 
					        results.append({
 | 
				
			||||||
 | 
					            'url': url,
 | 
				
			||||||
 | 
					            'title': html_to_text(title),
 | 
				
			||||||
 | 
					            'content': html_to_text(content),
 | 
				
			||||||
 | 
					        })
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return results
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def _get_url(result):
 | 
				
			||||||
 | 
					    if 's' in result['e']:
 | 
				
			||||||
 | 
					        return result['e']['s'][0]['u']
 | 
				
			||||||
 | 
					    return 'https://academic.microsoft.com/#/detail/{pid}'.format(pid=result['id'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def _get_content(result):
 | 
				
			||||||
 | 
					    if 'd' in result['e']:
 | 
				
			||||||
 | 
					        content = result['e']['d']
 | 
				
			||||||
 | 
					        if len(content) > 300:
 | 
				
			||||||
 | 
					            return content[:300] + '...'
 | 
				
			||||||
 | 
					        return content
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return ''
 | 
				
			||||||
@ -398,15 +398,7 @@ engines:
 | 
				
			|||||||
    shortcut : lo
 | 
					    shortcut : lo
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  - name : microsoft academic
 | 
					  - name : microsoft academic
 | 
				
			||||||
    engine : json_engine
 | 
					    engine : microsoft_academic
 | 
				
			||||||
    paging : True
 | 
					 | 
				
			||||||
    search_url : https://academic.microsoft.com/api/search/GetEntityResults?query=%40{query}%40&filters=&offset={pageno}&limit=8&correlationId=undefined
 | 
					 | 
				
			||||||
    results_query : results
 | 
					 | 
				
			||||||
    url_query : u
 | 
					 | 
				
			||||||
    title_query : dn
 | 
					 | 
				
			||||||
    content_query : d
 | 
					 | 
				
			||||||
    page_size : 8
 | 
					 | 
				
			||||||
    first_page_num : 0
 | 
					 | 
				
			||||||
    categories : science
 | 
					    categories : science
 | 
				
			||||||
    shortcut : ma
 | 
					    shortcut : ma
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user