mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-26 16:22:30 -04:00 
			
		
		
		
	
		
			
				
	
	
		
			152 lines
		
	
	
		
			4.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			152 lines
		
	
	
		
			4.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # SPDX-License-Identifier: AGPL-3.0-or-later
 | ||
| """CORE_ (COnnecting REpositories) provides a comprehensive bibliographic
 | ||
| database of the world’s scholarly literature, collecting and indexing
 | ||
| research from repositories and journals.
 | ||
| 
 | ||
| .. _CORE: https://core.ac.uk/about
 | ||
| 
 | ||
| .. _core engine config:
 | ||
| 
 | ||
| Configuration
 | ||
| =============
 | ||
| 
 | ||
| The engine has the following additional settings:
 | ||
| 
 | ||
| - :py:obj:`api_key`
 | ||
| 
 | ||
| .. code:: yaml
 | ||
| 
 | ||
|   - name: core.ac.uk
 | ||
|     engine: core
 | ||
|     categories: science
 | ||
|     shortcut: cor
 | ||
|     api_key: "..."
 | ||
|     timeout: 5
 | ||
| 
 | ||
| Implementations
 | ||
| ===============
 | ||
| 
 | ||
| """
 | ||
| # pylint: disable=too-many-branches
 | ||
| 
 | ||
| from datetime import datetime
 | ||
| from urllib.parse import urlencode
 | ||
| 
 | ||
| from searx.exceptions import SearxEngineAPIException
 | ||
| 
 | ||
| about = {
 | ||
|     "website": 'https://core.ac.uk',
 | ||
|     "wikidata_id": 'Q22661180',
 | ||
|     "official_api_documentation": 'https://api.core.ac.uk/docs/v3',
 | ||
|     "use_official_api": True,
 | ||
|     "require_api_key": True,
 | ||
|     "results": 'JSON',
 | ||
| }
 | ||
| 
 | ||
| api_key = 'unset'
 | ||
| """For an API key register at https://core.ac.uk/services/api and insert
 | ||
| the API key in the engine :ref:`core engine config`."""
 | ||
| 
 | ||
| categories = ['science', 'scientific publications']
 | ||
| paging = True
 | ||
| nb_per_page = 10
 | ||
| base_url = 'https://api.core.ac.uk/v3/search/works/'
 | ||
| 
 | ||
| 
 | ||
| def request(query, params):
 | ||
|     if api_key == 'unset':
 | ||
|         raise SearxEngineAPIException('missing CORE API key')
 | ||
| 
 | ||
|     # API v3 uses different parameters
 | ||
|     search_params = {
 | ||
|         'q': query,
 | ||
|         'offset': (params['pageno'] - 1) * nb_per_page,
 | ||
|         'limit': nb_per_page,
 | ||
|         'sort': 'relevance',
 | ||
|     }
 | ||
| 
 | ||
|     params['url'] = base_url + '?' + urlencode(search_params)
 | ||
|     params['headers'] = {'Authorization': f'Bearer {api_key}'}
 | ||
| 
 | ||
|     return params
 | ||
| 
 | ||
| 
 | ||
| def response(resp):
 | ||
|     results = []
 | ||
|     json_data = resp.json()
 | ||
| 
 | ||
|     for result in json_data.get('results', []):
 | ||
|         # Get title
 | ||
|         if not result.get('title'):
 | ||
|             continue
 | ||
| 
 | ||
|         # Get URL - try different options
 | ||
|         url = None
 | ||
| 
 | ||
|         # Try DOI first
 | ||
|         doi = result.get('doi')
 | ||
|         if doi:
 | ||
|             url = f'https://doi.org/{doi}'
 | ||
| 
 | ||
|         if url is None and result.get('doi'):
 | ||
|             # use the DOI reference
 | ||
|             url = 'https://doi.org/' + str(result['doi'])
 | ||
|         elif result.get('id'):
 | ||
|             url = 'https://core.ac.uk/works/' + str(result['id'])
 | ||
|         elif result.get('downloadUrl'):
 | ||
|             url = result['downloadUrl']
 | ||
|         elif result.get('sourceFulltextUrls'):
 | ||
|             url = result['sourceFulltextUrls']
 | ||
|         else:
 | ||
|             continue
 | ||
| 
 | ||
|         # Published date
 | ||
|         published_date = None
 | ||
| 
 | ||
|         raw_date = result.get('publishedDate') or result.get('depositedDate')
 | ||
|         if raw_date:
 | ||
|             try:
 | ||
|                 published_date = datetime.fromisoformat(result['publishedDate'].replace('Z', '+00:00'))
 | ||
|             except (ValueError, AttributeError):
 | ||
|                 pass
 | ||
| 
 | ||
|         # Handle journals
 | ||
|         journals = []
 | ||
|         if result.get('journals'):
 | ||
|             journals = [j.get('title') for j in result['journals'] if j.get('title')]
 | ||
| 
 | ||
|         # Handle publisher
 | ||
|         publisher = result.get('publisher', '').strip("'")
 | ||
|         if publisher:
 | ||
|             publisher = publisher.strip("'")
 | ||
| 
 | ||
|         # Handle authors
 | ||
|         authors = set()
 | ||
|         for i in result.get('authors', []):
 | ||
|             name = i.get("name")
 | ||
|             if name:
 | ||
|                 authors.add(name)
 | ||
| 
 | ||
|         results.append(
 | ||
|             {
 | ||
|                 'template': 'paper.html',
 | ||
|                 'title': result.get('title'),
 | ||
|                 'url': url,
 | ||
|                 'content': result.get('fullText', '') or '',
 | ||
|                 # 'comments': '',
 | ||
|                 'tags': result.get('fieldOfStudy', []),
 | ||
|                 'publishedDate': published_date,
 | ||
|                 'type': result.get('documentType', '') or '',
 | ||
|                 'authors': authors,
 | ||
|                 'editor': ', '.join(result.get('contributors', [])),
 | ||
|                 'publisher': publisher,
 | ||
|                 'journal': ', '.join(journals),
 | ||
|                 'doi': result.get('doi'),
 | ||
|                 # 'issn' : ''
 | ||
|                 # 'isbn' : ''
 | ||
|                 'pdf_url': result.get('downloadUrl', {}) or result.get("sourceFulltextUrls", {}),
 | ||
|             }
 | ||
|         )
 | ||
| 
 | ||
|     return results
 |