272 lines
24 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>searx.engines.core &#8212; SearXNG Documentation (2025.11.7+b9b46431b)</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=6625fa76" />
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
<script src="../../../_static/documentation_options.js?v=abc48383"></script>
<script src="../../../_static/doctools.js?v=9bcbadda"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script data-project="searxng" data-version="2025.11.7+b9b46431b" src="../../../_static/describe_version.js?v=fa7f30d0"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head><body>
<div class="related" role="navigation" aria-label="Related">
<h3>Navigation</h3>
<ul>
<li class="right" style="margin-right: 10px">
<a href="../../../genindex.html" title="General Index"
accesskey="I">index</a></li>
<li class="right" >
<a href="../../../py-modindex.html" title="Python Module Index"
>modules</a> |</li>
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.11.7+b9b46431b)</a> &#187;</li>
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> &#187;</li>
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> &#187;</li>
<li class="nav-item nav-item-this"><a href="">searx.engines.core</a></li>
</ul>
</div>
<div class="document">
<div class="documentwrapper">
<div class="bodywrapper">
<div class="body" role="main">
<h1>Source code for searx.engines.core</h1><div class="highlight"><pre>
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
<span class="sd">&quot;&quot;&quot;CORE_ (COnnecting REpositories) provides a comprehensive bibliographic</span>
<span class="sd">database of the worlds scholarly literature, collecting and indexing</span>
<span class="sd">research from repositories and journals.</span>
<span class="sd">.. _CORE: https://core.ac.uk/about</span>
<span class="sd">.. note::</span>
<span class="sd"> The CORE engine requires an :py:obj:`API key &lt;api_key&gt;`.</span>
<span class="sd">.. _core engine config:</span>
<span class="sd">Configuration</span>
<span class="sd">=============</span>
<span class="sd">The engine has the following additional settings:</span>
<span class="sd">- :py:obj:`api_key`</span>
<span class="sd">.. code:: yaml</span>
<span class="sd"> - name: core.ac.uk</span>
<span class="sd"> api_key: &quot;...&quot;</span>
<span class="sd"> inactive: false</span>
<span class="sd">Implementations</span>
<span class="sd">===============</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">t</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">datetime</span><span class="w"> </span><span class="kn">import</span> <span class="n">datetime</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">urllib.parse</span><span class="w"> </span><span class="kn">import</span> <span class="n">urlencode</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">searx.result_types</span><span class="w"> </span><span class="kn">import</span> <span class="n">EngineResults</span>
<span class="k">if</span> <span class="n">t</span><span class="o">.</span><span class="n">TYPE_CHECKING</span><span class="p">:</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">searx.extended_types</span><span class="w"> </span><span class="kn">import</span> <span class="n">SXNG_Response</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">searx.search.processors</span><span class="w"> </span><span class="kn">import</span> <span class="n">OnlineParams</span>
<span class="n">about</span> <span class="o">=</span> <span class="p">{</span>
<span class="s2">&quot;website&quot;</span><span class="p">:</span> <span class="s2">&quot;https://core.ac.uk&quot;</span><span class="p">,</span>
<span class="s2">&quot;wikidata_id&quot;</span><span class="p">:</span> <span class="s2">&quot;Q22661180&quot;</span><span class="p">,</span>
<span class="s2">&quot;official_api_documentation&quot;</span><span class="p">:</span> <span class="s2">&quot;https://api.core.ac.uk/docs/v3&quot;</span><span class="p">,</span>
<span class="s2">&quot;use_official_api&quot;</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span>
<span class="s2">&quot;require_api_key&quot;</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span>
<span class="s2">&quot;results&quot;</span><span class="p">:</span> <span class="s2">&quot;JSON&quot;</span><span class="p">,</span>
<span class="p">}</span>
<span class="n">api_key</span> <span class="o">=</span> <span class="s2">&quot;&quot;</span>
<span class="sd">&quot;&quot;&quot;For an API key register at https://core.ac.uk/services/api and insert</span>
<span class="sd">the API key in the engine :ref:`core engine config`.&quot;&quot;&quot;</span>
<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s2">&quot;science&quot;</span><span class="p">,</span> <span class="s2">&quot;scientific publications&quot;</span><span class="p">]</span>
<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span>
<span class="n">nb_per_page</span> <span class="o">=</span> <span class="mi">10</span>
<span class="n">base_url</span> <span class="o">=</span> <span class="s2">&quot;https://api.core.ac.uk/v3/search/works/&quot;</span>
<div class="viewcode-block" id="setup">
<a class="viewcode-back" href="../../../dev/engines/online/core.html#searx.engines.core.setup">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">setup</span><span class="p">(</span><span class="n">engine_settings</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Initialization of the CORE_ engine, checks whether the :py:obj:`api_key`</span>
<span class="sd"> is set, otherwise the engine is inactive.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">key</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">engine_settings</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;api_key&quot;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">key</span> <span class="ow">and</span> <span class="n">key</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">(</span><span class="s2">&quot;unset&quot;</span><span class="p">,</span> <span class="s2">&quot;unknown&quot;</span><span class="p">,</span> <span class="s2">&quot;...&quot;</span><span class="p">):</span>
<span class="k">return</span> <span class="kc">True</span>
<span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s2">&quot;CORE&#39;s API key is not set or invalid.&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">False</span></div>
<span class="k">def</span><span class="w"> </span><span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">params</span><span class="p">:</span> <span class="s2">&quot;OnlineParams&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="c1"># API v3 uses different parameters</span>
<span class="n">search_params</span> <span class="o">=</span> <span class="p">{</span>
<span class="s2">&quot;q&quot;</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span>
<span class="s2">&quot;offset&quot;</span><span class="p">:</span> <span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s2">&quot;pageno&quot;</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">nb_per_page</span><span class="p">,</span>
<span class="s2">&quot;limit&quot;</span><span class="p">:</span> <span class="n">nb_per_page</span><span class="p">,</span>
<span class="s2">&quot;sort&quot;</span><span class="p">:</span> <span class="s2">&quot;relevance&quot;</span><span class="p">,</span>
<span class="p">}</span>
<span class="n">params</span><span class="p">[</span><span class="s2">&quot;url&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="s2">&quot;?&quot;</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span><span class="n">search_params</span><span class="p">)</span>
<span class="n">params</span><span class="p">[</span><span class="s2">&quot;headers&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;Authorization&quot;</span><span class="p">:</span> <span class="sa">f</span><span class="s2">&quot;Bearer </span><span class="si">{</span><span class="n">api_key</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">}</span>
<span class="k">def</span><span class="w"> </span><span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">:</span> <span class="s2">&quot;SXNG_Response&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">EngineResults</span><span class="p">:</span>
<span class="c1"># pylint: disable=too-many-branches</span>
<span class="n">res</span> <span class="o">=</span> <span class="n">EngineResults</span><span class="p">()</span>
<span class="n">json_data</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()</span>
<span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">json_data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;results&quot;</span><span class="p">,</span> <span class="p">[]):</span>
<span class="c1"># Get title</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;title&quot;</span><span class="p">):</span>
<span class="k">continue</span>
<span class="c1"># Get URL - try different options</span>
<span class="n">url</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span>
<span class="c1"># Try DOI first</span>
<span class="n">doi</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;doi&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">doi</span><span class="p">:</span>
<span class="n">url</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;https://doi.org/</span><span class="si">{</span><span class="n">doi</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">if</span> <span class="n">url</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;doi&quot;</span><span class="p">):</span>
<span class="c1"># use the DOI reference</span>
<span class="n">url</span> <span class="o">=</span> <span class="s2">&quot;https://doi.org/&quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">result</span><span class="p">[</span><span class="s2">&quot;doi&quot;</span><span class="p">])</span>
<span class="k">elif</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;id&quot;</span><span class="p">):</span>
<span class="n">url</span> <span class="o">=</span> <span class="s2">&quot;https://core.ac.uk/works/&quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">result</span><span class="p">[</span><span class="s2">&quot;id&quot;</span><span class="p">])</span>
<span class="k">elif</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;downloadUrl&quot;</span><span class="p">):</span>
<span class="n">url</span> <span class="o">=</span> <span class="n">result</span><span class="p">[</span><span class="s2">&quot;downloadUrl&quot;</span><span class="p">]</span>
<span class="k">elif</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;sourceFulltextUrls&quot;</span><span class="p">):</span>
<span class="n">url</span> <span class="o">=</span> <span class="n">result</span><span class="p">[</span><span class="s2">&quot;sourceFulltextUrls&quot;</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">continue</span>
<span class="c1"># Published date</span>
<span class="n">published_date</span> <span class="o">=</span> <span class="kc">None</span>
<span class="n">raw_date</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;publishedDate&quot;</span><span class="p">)</span> <span class="ow">or</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;depositedDate&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">raw_date</span><span class="p">:</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">published_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">fromisoformat</span><span class="p">(</span><span class="n">result</span><span class="p">[</span><span class="s2">&quot;publishedDate&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;Z&quot;</span><span class="p">,</span> <span class="s2">&quot;+00:00&quot;</span><span class="p">))</span>
<span class="k">except</span> <span class="p">(</span><span class="ne">ValueError</span><span class="p">,</span> <span class="ne">AttributeError</span><span class="p">):</span>
<span class="k">pass</span>
<span class="c1"># Handle journals</span>
<span class="n">journals</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">if</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;journals&quot;</span><span class="p">):</span>
<span class="n">journals</span> <span class="o">=</span> <span class="p">[</span><span class="n">j</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;title&quot;</span><span class="p">)</span> <span class="k">for</span> <span class="n">j</span> <span class="ow">in</span> <span class="n">result</span><span class="p">[</span><span class="s2">&quot;journals&quot;</span><span class="p">]</span> <span class="k">if</span> <span class="n">j</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;title&quot;</span><span class="p">)]</span>
<span class="c1"># Handle publisher</span>
<span class="n">publisher</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;publisher&quot;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">strip</span><span class="p">(</span><span class="s2">&quot;&#39;&quot;</span><span class="p">)</span>
<span class="c1"># Handle authors</span>
<span class="n">authors</span><span class="p">:</span> <span class="nb">set</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;authors&quot;</span><span class="p">,</span> <span class="p">[]):</span>
<span class="n">name</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="n">i</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;name&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">name</span><span class="p">:</span>
<span class="n">authors</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">name</span><span class="p">)</span>
<span class="n">res</span><span class="o">.</span><span class="n">add</span><span class="p">(</span>
<span class="n">res</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">Paper</span><span class="p">(</span>
<span class="n">title</span><span class="o">=</span><span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;title&quot;</span><span class="p">),</span>
<span class="n">url</span><span class="o">=</span><span class="n">url</span><span class="p">,</span>
<span class="n">content</span><span class="o">=</span><span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;fullText&quot;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span> <span class="ow">or</span> <span class="s2">&quot;&quot;</span><span class="p">,</span>
<span class="n">tags</span><span class="o">=</span><span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;fieldOfStudy&quot;</span><span class="p">,</span> <span class="p">[]),</span>
<span class="n">publishedDate</span><span class="o">=</span><span class="n">published_date</span><span class="p">,</span>
<span class="nb">type</span><span class="o">=</span><span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;documentType&quot;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span> <span class="ow">or</span> <span class="s2">&quot;&quot;</span><span class="p">,</span>
<span class="n">authors</span><span class="o">=</span><span class="n">authors</span><span class="p">,</span>
<span class="n">editor</span><span class="o">=</span><span class="s2">&quot;, &quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;contributors&quot;</span><span class="p">,</span> <span class="p">[])),</span>
<span class="n">publisher</span><span class="o">=</span><span class="n">publisher</span><span class="p">,</span>
<span class="n">journal</span><span class="o">=</span><span class="s2">&quot;, &quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">journals</span><span class="p">),</span>
<span class="n">doi</span><span class="o">=</span><span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;doi&quot;</span><span class="p">),</span>
<span class="n">pdf_url</span><span class="o">=</span><span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;downloadUrl&quot;</span><span class="p">,</span> <span class="p">{})</span> <span class="ow">or</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;sourceFulltextUrls&quot;</span><span class="p">,</span> <span class="p">{}),</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">res</span>
</pre></div>
<div class="clearer"></div>
</div>
</div>
</div>
<span id="sidebar-top"></span>
<div class="sphinxsidebar" role="navigation" aria-label="Main">
<div class="sphinxsidebarwrapper">
<p class="logo"><a href="../../../index.html">
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/>
</a></p>
<h3><a href="../../../index.html">Table of Contents</a></h3>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
</ul>
<h3>Project Links</h3>
<ul>
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
<li><a href="https://searx.space">Public instances</a>
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
</ul><h3>Navigation</h3>
<ul>
<li><a href="../../../index.html">Overview</a>
<ul>
<li><a href="../../index.html">Module code</a>
<ul>
<li><a href="../engines.html">searx.engines</a>
</ul>
</li></ul>
</li>
</ul>
</li>
</ul>
<search id="searchbox" style="display: none" role="search">
<h3 id="searchlabel">Quick search</h3>
<div class="searchformwrapper">
<form class="search" action="../../../search.html" method="get">
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
<input type="submit" value="Go" />
</form>
</div>
</search>
<script>document.getElementById('searchbox').style.display = "block"</script>
</div>
</div>
<div class="clearer"></div>
</div>
<div class="footer" role="contentinfo">
&#169; Copyright SearXNG team.
</div>
</body>
</html>