mirror of
				https://github.com/searxng/searxng.git
				synced 2025-11-03 19:17:07 -05:00 
			
		
		
		
	[mod] implement searx.wikidata_units for unit converters
This commit is contained in:
		
							parent
							
								
									cf59ee2efc
								
							
						
					
					
						commit
						a800dd0473
					
				@ -15,7 +15,7 @@ import babel.numbers
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
from flask_babel import gettext, get_locale
 | 
					from flask_babel import gettext, get_locale
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from searx import data
 | 
					from searx.units import symbol_to_si
 | 
				
			||||||
from searx.plugins import Plugin, PluginInfo
 | 
					from searx.plugins import Plugin, PluginInfo
 | 
				
			||||||
from searx.result_types import EngineResults
 | 
					from searx.result_types import EngineResults
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -86,132 +86,6 @@ RE_MEASURE = r'''
 | 
				
			|||||||
'''
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ADDITIONAL_UNITS = [
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
        "si_name": "Q11579",
 | 
					 | 
				
			||||||
        "symbol": "°C",
 | 
					 | 
				
			||||||
        "to_si": lambda val: val + 273.15,
 | 
					 | 
				
			||||||
        "from_si": lambda val: val - 273.15,
 | 
					 | 
				
			||||||
    },
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
        "si_name": "Q11579",
 | 
					 | 
				
			||||||
        "symbol": "°F",
 | 
					 | 
				
			||||||
        "to_si": lambda val: (val + 459.67) * 5 / 9,
 | 
					 | 
				
			||||||
        "from_si": lambda val: (val * 9 / 5) - 459.67,
 | 
					 | 
				
			||||||
    },
 | 
					 | 
				
			||||||
]
 | 
					 | 
				
			||||||
"""Additional items to convert from a measure unit to a SI unit (vice versa).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
.. code:: python
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
        "si_name": "Q11579",                 # Wikidata item ID of the SI unit (Kelvin)
 | 
					 | 
				
			||||||
        "symbol": "°C",                      # symbol of the measure unit
 | 
					 | 
				
			||||||
        "to_si": lambda val: val + 273.15,   # convert measure value (val) to SI unit
 | 
					 | 
				
			||||||
        "from_si": lambda val: val - 273.15, # convert SI value (val) measure unit
 | 
					 | 
				
			||||||
    },
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
        "si_name": "Q11573",
 | 
					 | 
				
			||||||
        "symbol": "mi",
 | 
					 | 
				
			||||||
        "to_si": 1609.344,                   # convert measure value (val) to SI unit
 | 
					 | 
				
			||||||
        "from_si": 1 / 1609.344              # convert SI value (val) measure unit
 | 
					 | 
				
			||||||
    },
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The values of ``to_si`` and ``from_si`` can be of :py:obj:`float` (a multiplier)
 | 
					 | 
				
			||||||
or a callable_ (val in / converted value returned).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
.. _callable: https://docs.python.org/3/glossary.html#term-callable
 | 
					 | 
				
			||||||
"""
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
ALIAS_SYMBOLS = {
 | 
					 | 
				
			||||||
    '°C': ('C',),
 | 
					 | 
				
			||||||
    '°F': ('F',),
 | 
					 | 
				
			||||||
    'mi': ('L',),
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
"""Alias symbols for known unit of measure symbols / by example::
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    '°C': ('C', ...),  # list of alias symbols for °C (Q69362731)
 | 
					 | 
				
			||||||
    '°F': ('F', ...),  # list of alias symbols for °F (Q99490479)
 | 
					 | 
				
			||||||
    'mi': ('L',),      # list of alias symbols for mi (Q253276)
 | 
					 | 
				
			||||||
"""
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
SYMBOL_TO_SI = []
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def symbol_to_si():
 | 
					 | 
				
			||||||
    """Generates a list of tuples, each tuple is a measure unit and the fields
 | 
					 | 
				
			||||||
    in the tuple are:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    0. Symbol of the measure unit (e.g. 'mi' for measure unit 'miles' Q253276)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    1. SI name of the measure unit (e.g. Q11573 for SI unit 'metre')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    2. Factor to get SI value from measure unit (e.g. 1mi is equal to SI 1m
 | 
					 | 
				
			||||||
       multiplied by 1609.344)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    3. Factor to get measure value from from SI value (e.g. SI 100m is equal to
 | 
					 | 
				
			||||||
       100mi divided by 1609.344)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    The returned list is sorted, the first items are created from
 | 
					 | 
				
			||||||
    ``WIKIDATA_UNITS``, the second group of items is build from
 | 
					 | 
				
			||||||
    :py:obj:`ADDITIONAL_UNITS` and items created from :py:obj:`ALIAS_SYMBOLS`.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    If you search this list for a symbol, then a match with a symbol from
 | 
					 | 
				
			||||||
    Wikidata has the highest weighting (first hit in the list), followed by the
 | 
					 | 
				
			||||||
    symbols from the :py:obj:`ADDITIONAL_UNITS` and the lowest weighting is
 | 
					 | 
				
			||||||
    given to the symbols resulting from the aliases :py:obj:`ALIAS_SYMBOLS`.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    global SYMBOL_TO_SI  # pylint: disable=global-statement
 | 
					 | 
				
			||||||
    if SYMBOL_TO_SI:
 | 
					 | 
				
			||||||
        return SYMBOL_TO_SI
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # filter out units which can't be normalized to a SI unit and filter out
 | 
					 | 
				
			||||||
    # units without a symbol / arcsecond does not have a symbol
 | 
					 | 
				
			||||||
    # https://www.wikidata.org/wiki/Q829073
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for item in data.WIKIDATA_UNITS.values():
 | 
					 | 
				
			||||||
        if item['to_si_factor'] and item['symbol']:
 | 
					 | 
				
			||||||
            SYMBOL_TO_SI.append(
 | 
					 | 
				
			||||||
                (
 | 
					 | 
				
			||||||
                    item['symbol'],
 | 
					 | 
				
			||||||
                    item['si_name'],
 | 
					 | 
				
			||||||
                    1 / item['to_si_factor'],  # from_si
 | 
					 | 
				
			||||||
                    item['to_si_factor'],  # to_si
 | 
					 | 
				
			||||||
                    item['symbol'],
 | 
					 | 
				
			||||||
                )
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for item in ADDITIONAL_UNITS:
 | 
					 | 
				
			||||||
        SYMBOL_TO_SI.append(
 | 
					 | 
				
			||||||
            (
 | 
					 | 
				
			||||||
                item['symbol'],
 | 
					 | 
				
			||||||
                item['si_name'],
 | 
					 | 
				
			||||||
                item['from_si'],
 | 
					 | 
				
			||||||
                item['to_si'],
 | 
					 | 
				
			||||||
                item['symbol'],
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    alias_items = []
 | 
					 | 
				
			||||||
    for item in SYMBOL_TO_SI:
 | 
					 | 
				
			||||||
        for alias in ALIAS_SYMBOLS.get(item[0], ()):
 | 
					 | 
				
			||||||
            alias_items.append(
 | 
					 | 
				
			||||||
                (
 | 
					 | 
				
			||||||
                    alias,
 | 
					 | 
				
			||||||
                    item[1],
 | 
					 | 
				
			||||||
                    item[2],  # from_si
 | 
					 | 
				
			||||||
                    item[3],  # to_si
 | 
					 | 
				
			||||||
                    item[0],  # origin unit
 | 
					 | 
				
			||||||
                )
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
    SYMBOL_TO_SI = SYMBOL_TO_SI + alias_items
 | 
					 | 
				
			||||||
    return SYMBOL_TO_SI
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def _parse_text_and_convert(from_query, to_query) -> str | None:
 | 
					def _parse_text_and_convert(from_query, to_query) -> str | None:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # pylint: disable=too-many-branches, too-many-locals
 | 
					    # pylint: disable=too-many-branches, too-many-locals
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										231
									
								
								searx/wikidata_units.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										231
									
								
								searx/wikidata_units.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,231 @@
 | 
				
			|||||||
 | 
					# SPDX-License-Identifier: AGPL-3.0-or-later
 | 
				
			||||||
 | 
					"""Unit conversion on the basis of `SPARQL/WIKIDATA Precision, Units and
 | 
				
			||||||
 | 
					Coordinates`_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.. _SPARQL/WIKIDATA Precision, Units and Coordinates:
 | 
				
			||||||
 | 
					   https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates#Quantities
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__all__ = ["convert_from_si", "convert_to_si", "symbol_to_si"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import collections
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from searx import data
 | 
				
			||||||
 | 
					from searx.engines import wikidata
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ADDITIONAL_UNITS = [
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        "si_name": "Q11579",
 | 
				
			||||||
 | 
					        "symbol": "°C",
 | 
				
			||||||
 | 
					        "to_si": lambda val: val + 273.15,
 | 
				
			||||||
 | 
					        "from_si": lambda val: val - 273.15,
 | 
				
			||||||
 | 
					    },
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        "si_name": "Q11579",
 | 
				
			||||||
 | 
					        "symbol": "°F",
 | 
				
			||||||
 | 
					        "to_si": lambda val: (val + 459.67) * 5 / 9,
 | 
				
			||||||
 | 
					        "from_si": lambda val: (val * 9 / 5) - 459.67,
 | 
				
			||||||
 | 
					    },
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					"""Additional items to convert from a measure unit to a SI unit (vice versa).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.. code:: python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        "si_name": "Q11579",                 # Wikidata item ID of the SI unit (Kelvin)
 | 
				
			||||||
 | 
					        "symbol": "°C",                      # symbol of the measure unit
 | 
				
			||||||
 | 
					        "to_si": lambda val: val + 273.15,   # convert measure value (val) to SI unit
 | 
				
			||||||
 | 
					        "from_si": lambda val: val - 273.15, # convert SI value (val) measure unit
 | 
				
			||||||
 | 
					    },
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        "si_name": "Q11573",
 | 
				
			||||||
 | 
					        "symbol": "mi",
 | 
				
			||||||
 | 
					        "to_si": 1609.344,                   # convert measure value (val) to SI unit
 | 
				
			||||||
 | 
					        "from_si": 1 / 1609.344              # convert SI value (val) measure unit
 | 
				
			||||||
 | 
					    },
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The values of ``to_si`` and ``from_si`` can be of :py:obj:`float` (a multiplier)
 | 
				
			||||||
 | 
					or a callable_ (val in / converted value returned).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.. _callable: https://docs.python.org/3/glossary.html#term-callable
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ALIAS_SYMBOLS = {
 | 
				
			||||||
 | 
					    '°C': ('C',),
 | 
				
			||||||
 | 
					    '°F': ('F',),
 | 
				
			||||||
 | 
					    'mi': ('L',),
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					"""Alias symbols for known unit of measure symbols / by example::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    '°C': ('C', ...),  # list of alias symbols for °C (Q69362731)
 | 
				
			||||||
 | 
					    '°F': ('F', ...),  # list of alias symbols for °F (Q99490479)
 | 
				
			||||||
 | 
					    'mi': ('L',),      # list of alias symbols for mi (Q253276)
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SYMBOL_TO_SI = []
 | 
				
			||||||
 | 
					UNITS_BY_SI_NAME: dict | None = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def convert_from_si(si_name: str, symbol: str, value: float | int) -> float:
 | 
				
			||||||
 | 
					    from_si = units_by_si_name(si_name)[symbol][symbol]["from_si"]
 | 
				
			||||||
 | 
					    if isinstance(from_si, (float, int)):
 | 
				
			||||||
 | 
					        value = float(value) * from_si
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        value = from_si(float(value))
 | 
				
			||||||
 | 
					    return value
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def convert_to_si(si_name: str, symbol: str, value: float | int) -> float:
 | 
				
			||||||
 | 
					    to_si = units_by_si_name(si_name)[symbol][symbol]["to_si"]
 | 
				
			||||||
 | 
					    if isinstance(to_si, (float, int)):
 | 
				
			||||||
 | 
					        value = float(value) * to_si
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        value = to_si(float(value))
 | 
				
			||||||
 | 
					    return value
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def units_by_si_name(si_name):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    global UNITS_BY_SI_NAME
 | 
				
			||||||
 | 
					    if UNITS_BY_SI_NAME is not None:
 | 
				
			||||||
 | 
					        return UNITS_BY_SI_NAME[si_name]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    UNITS_BY_SI_NAME = {}
 | 
				
			||||||
 | 
					    for item in symbol_to_si():
 | 
				
			||||||
 | 
					        by_symbol = UNITS_BY_SI_NAME.get(si_name)
 | 
				
			||||||
 | 
					        if by_symbol is None:
 | 
				
			||||||
 | 
					            by_symbol = {}
 | 
				
			||||||
 | 
					            UNITS_BY_SI_NAME[si_name] = by_symbol
 | 
				
			||||||
 | 
					        by_symbol[item["symbol"]] = item
 | 
				
			||||||
 | 
					    return UNITS_BY_SI_NAME[si_name]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def symbol_to_si():
 | 
				
			||||||
 | 
					    """Generates a list of tuples, each tuple is a measure unit and the fields
 | 
				
			||||||
 | 
					    in the tuple are:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    0. Symbol of the measure unit (e.g. 'mi' for measure unit 'miles' Q253276)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    1. SI name of the measure unit (e.g. Q11573 for SI unit 'metre')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    2. Factor to get SI value from measure unit (e.g. 1mi is equal to SI 1m
 | 
				
			||||||
 | 
					       multiplied by 1609.344)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    3. Factor to get measure value from from SI value (e.g. SI 100m is equal to
 | 
				
			||||||
 | 
					       100mi divided by 1609.344)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    The returned list is sorted, the first items are created from
 | 
				
			||||||
 | 
					    ``WIKIDATA_UNITS``, the second group of items is build from
 | 
				
			||||||
 | 
					    :py:obj:`ADDITIONAL_UNITS` and items created from :py:obj:`ALIAS_SYMBOLS`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    If you search this list for a symbol, then a match with a symbol from
 | 
				
			||||||
 | 
					    Wikidata has the highest weighting (first hit in the list), followed by the
 | 
				
			||||||
 | 
					    symbols from the :py:obj:`ADDITIONAL_UNITS` and the lowest weighting is
 | 
				
			||||||
 | 
					    given to the symbols resulting from the aliases :py:obj:`ALIAS_SYMBOLS`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    global SYMBOL_TO_SI  # pylint: disable=global-statement
 | 
				
			||||||
 | 
					    if SYMBOL_TO_SI:
 | 
				
			||||||
 | 
					        return SYMBOL_TO_SI
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # filter out units which can't be normalized to a SI unit and filter out
 | 
				
			||||||
 | 
					    # units without a symbol / arcsecond does not have a symbol
 | 
				
			||||||
 | 
					    # https://www.wikidata.org/wiki/Q829073
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for item in data.WIKIDATA_UNITS.values():
 | 
				
			||||||
 | 
					        if item['to_si_factor'] and item['symbol']:
 | 
				
			||||||
 | 
					            SYMBOL_TO_SI.append(
 | 
				
			||||||
 | 
					                (
 | 
				
			||||||
 | 
					                    item['symbol'],
 | 
				
			||||||
 | 
					                    item['si_name'],
 | 
				
			||||||
 | 
					                    1 / item['to_si_factor'],  # from_si
 | 
				
			||||||
 | 
					                    item['to_si_factor'],  # to_si
 | 
				
			||||||
 | 
					                    item['symbol'],
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for item in ADDITIONAL_UNITS:
 | 
				
			||||||
 | 
					        SYMBOL_TO_SI.append(
 | 
				
			||||||
 | 
					            (
 | 
				
			||||||
 | 
					                item['symbol'],
 | 
				
			||||||
 | 
					                item['si_name'],
 | 
				
			||||||
 | 
					                item['from_si'],
 | 
				
			||||||
 | 
					                item['to_si'],
 | 
				
			||||||
 | 
					                item['symbol'],
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    alias_items = []
 | 
				
			||||||
 | 
					    for item in SYMBOL_TO_SI:
 | 
				
			||||||
 | 
					        for alias in ALIAS_SYMBOLS.get(item[0], ()):
 | 
				
			||||||
 | 
					            alias_items.append(
 | 
				
			||||||
 | 
					                (
 | 
				
			||||||
 | 
					                    alias,
 | 
				
			||||||
 | 
					                    item[1],
 | 
				
			||||||
 | 
					                    item[2],  # from_si
 | 
				
			||||||
 | 
					                    item[3],  # to_si
 | 
				
			||||||
 | 
					                    item[0],  # origin unit
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					    SYMBOL_TO_SI = SYMBOL_TO_SI + alias_items
 | 
				
			||||||
 | 
					    return SYMBOL_TO_SI
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# the response contains duplicate ?item with the different ?symbol
 | 
				
			||||||
 | 
					# "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result
 | 
				
			||||||
 | 
					# even if a ?item has different ?symbol of the same rank.
 | 
				
			||||||
 | 
					# A deterministic result
 | 
				
			||||||
 | 
					# see:
 | 
				
			||||||
 | 
					# * https://www.wikidata.org/wiki/Help:Ranking
 | 
				
			||||||
 | 
					# * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section)
 | 
				
			||||||
 | 
					# * https://w.wiki/32BT
 | 
				
			||||||
 | 
					# * https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates#Quantities
 | 
				
			||||||
 | 
					#   see the result for https://www.wikidata.org/wiki/Q11582
 | 
				
			||||||
 | 
					#   there are multiple symbols the same rank
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SARQL_REQUEST = """
 | 
				
			||||||
 | 
					SELECT DISTINCT ?item ?symbol ?tosi ?tosiUnit
 | 
				
			||||||
 | 
					WHERE
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  ?item wdt:P31/wdt:P279 wd:Q47574 .
 | 
				
			||||||
 | 
					  ?item p:P5061 ?symbolP .
 | 
				
			||||||
 | 
					  ?symbolP ps:P5061 ?symbol ;
 | 
				
			||||||
 | 
					           wikibase:rank ?rank .
 | 
				
			||||||
 | 
					  OPTIONAL {
 | 
				
			||||||
 | 
					    ?item p:P2370 ?tosistmt .
 | 
				
			||||||
 | 
					    ?tosistmt psv:P2370 ?tosinode .
 | 
				
			||||||
 | 
					    ?tosinode wikibase:quantityAmount ?tosi .
 | 
				
			||||||
 | 
					    ?tosinode wikibase:quantityUnit ?tosiUnit .
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  FILTER(LANG(?symbol) = "en").
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					ORDER BY ?item DESC(?rank) ?symbol
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def fetch_units():
 | 
				
			||||||
 | 
					    """Fetch units from Wikidata.  Function is used to update persistence of
 | 
				
			||||||
 | 
					    :py:obj:`searx.data.WIKIDATA_UNITS`."""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    results = collections.OrderedDict()
 | 
				
			||||||
 | 
					    response = wikidata.send_wikidata_query(SARQL_REQUEST)
 | 
				
			||||||
 | 
					    for unit in response['results']['bindings']:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        symbol = unit['symbol']['value']
 | 
				
			||||||
 | 
					        name = unit['item']['value'].rsplit('/', 1)[1]
 | 
				
			||||||
 | 
					        si_name = unit.get('tosiUnit', {}).get('value', '')
 | 
				
			||||||
 | 
					        if si_name:
 | 
				
			||||||
 | 
					            si_name = si_name.rsplit('/', 1)[1]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        to_si_factor = unit.get('tosi', {}).get('value', '')
 | 
				
			||||||
 | 
					        if name not in results:
 | 
				
			||||||
 | 
					            # ignore duplicate: always use the first one
 | 
				
			||||||
 | 
					            results[name] = {
 | 
				
			||||||
 | 
					                'symbol': symbol,
 | 
				
			||||||
 | 
					                'si_name': si_name if si_name else None,
 | 
				
			||||||
 | 
					                'to_si_factor': float(to_si_factor) if to_si_factor else None,
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					    return results
 | 
				
			||||||
@ -8,76 +8,15 @@ Output file: :origin:`searx/data/wikidata_units.json` (:origin:`CI Update data
 | 
				
			|||||||
"""
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import json
 | 
					import json
 | 
				
			||||||
import collections
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
# set path
 | 
					 | 
				
			||||||
from os.path import join
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from searx import searx_dir
 | 
					 | 
				
			||||||
from searx.engines import wikidata, set_loggers
 | 
					from searx.engines import wikidata, set_loggers
 | 
				
			||||||
from searx.data import data_dir
 | 
					from searx.data import data_dir
 | 
				
			||||||
 | 
					from searx.wikidata_units import fetch_units
 | 
				
			||||||
 | 
					
 | 
				
			||||||
DATA_FILE = data_dir / 'wikidata_units.json'
 | 
					DATA_FILE = data_dir / 'wikidata_units.json'
 | 
				
			||||||
 | 
					 | 
				
			||||||
set_loggers(wikidata, 'wikidata')
 | 
					set_loggers(wikidata, 'wikidata')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# the response contains duplicate ?item with the different ?symbol
 | 
					 | 
				
			||||||
# "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result
 | 
					 | 
				
			||||||
# even if a ?item has different ?symbol of the same rank.
 | 
					 | 
				
			||||||
# A deterministic result
 | 
					 | 
				
			||||||
# see:
 | 
					 | 
				
			||||||
# * https://www.wikidata.org/wiki/Help:Ranking
 | 
					 | 
				
			||||||
# * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section)
 | 
					 | 
				
			||||||
# * https://w.wiki/32BT
 | 
					 | 
				
			||||||
# * https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates#Quantities
 | 
					 | 
				
			||||||
#   see the result for https://www.wikidata.org/wiki/Q11582
 | 
					 | 
				
			||||||
#   there are multiple symbols the same rank
 | 
					 | 
				
			||||||
SARQL_REQUEST = """
 | 
					 | 
				
			||||||
SELECT DISTINCT ?item ?symbol ?tosi ?tosiUnit
 | 
					 | 
				
			||||||
WHERE
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  ?item wdt:P31/wdt:P279 wd:Q47574 .
 | 
					 | 
				
			||||||
  ?item p:P5061 ?symbolP .
 | 
					 | 
				
			||||||
  ?symbolP ps:P5061 ?symbol ;
 | 
					 | 
				
			||||||
           wikibase:rank ?rank .
 | 
					 | 
				
			||||||
  OPTIONAL {
 | 
					 | 
				
			||||||
    ?item p:P2370 ?tosistmt .
 | 
					 | 
				
			||||||
    ?tosistmt psv:P2370 ?tosinode .
 | 
					 | 
				
			||||||
    ?tosinode wikibase:quantityAmount ?tosi .
 | 
					 | 
				
			||||||
    ?tosinode wikibase:quantityUnit ?tosiUnit .
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  FILTER(LANG(?symbol) = "en").
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
ORDER BY ?item DESC(?rank) ?symbol
 | 
					 | 
				
			||||||
"""
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def get_data():
 | 
					 | 
				
			||||||
    results = collections.OrderedDict()
 | 
					 | 
				
			||||||
    response = wikidata.send_wikidata_query(SARQL_REQUEST)
 | 
					 | 
				
			||||||
    for unit in response['results']['bindings']:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        symbol = unit['symbol']['value']
 | 
					 | 
				
			||||||
        name = unit['item']['value'].rsplit('/', 1)[1]
 | 
					 | 
				
			||||||
        si_name = unit.get('tosiUnit', {}).get('value', '')
 | 
					 | 
				
			||||||
        if si_name:
 | 
					 | 
				
			||||||
            si_name = si_name.rsplit('/', 1)[1]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        to_si_factor = unit.get('tosi', {}).get('value', '')
 | 
					 | 
				
			||||||
        if name not in results:
 | 
					 | 
				
			||||||
            # ignore duplicate: always use the first one
 | 
					 | 
				
			||||||
            results[name] = {
 | 
					 | 
				
			||||||
                'symbol': symbol,
 | 
					 | 
				
			||||||
                'si_name': si_name if si_name else None,
 | 
					 | 
				
			||||||
                'to_si_factor': float(to_si_factor) if to_si_factor else None,
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
    return results
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def get_wikidata_units_filename():
 | 
					 | 
				
			||||||
    return join(join(searx_dir, "data"), "")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
if __name__ == '__main__':
 | 
					if __name__ == '__main__':
 | 
				
			||||||
    with DATA_FILE.open('w', encoding="utf8") as f:
 | 
					    with DATA_FILE.open('w', encoding="utf8") as f:
 | 
				
			||||||
        json.dump(get_data(), f, indent=4, sort_keys=True, ensure_ascii=False)
 | 
					        json.dump(fetch_units(), f, indent=4, sort_keys=True, ensure_ascii=False)
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user