feat: diacritic-insensitive search (#2132)

* add normalized columns and use them for search

* add migration to fill all normalized columns
This commit is contained in:
Sören 2023-02-20 01:40:18 +01:00 committed by GitHub
parent 670907b563
commit 6a5f9d7f6b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 201 additions and 11 deletions

View File

@ -0,0 +1,118 @@
"""add normalized search properties
Revision ID: 5ab195a474eb
Revises: 16160bf731a0
Create Date: 2023-02-14 20:45:41.102571
"""
import sqlalchemy as sa
from sqlalchemy import orm, select
from sqlalchemy.orm import Mapped, mapped_column, DeclarativeBase
from text_unidecode import unidecode
import mealie.db.migration_types
from alembic import op
from mealie.db.models._model_utils import GUID
# revision identifiers, used by Alembic.
revision = "5ab195a474eb"
down_revision = "16160bf731a0"
branch_labels = None
depends_on = None
class SqlAlchemyBase(DeclarativeBase):
pass
# Intermediate table definitions
class RecipeModel(SqlAlchemyBase):
__tablename__ = "recipes"
id: Mapped[GUID] = mapped_column(GUID, primary_key=True, default=GUID.generate)
name: Mapped[str] = mapped_column(sa.String, nullable=False)
description: Mapped[str | None] = mapped_column(sa.String)
name_normalized: Mapped[str] = mapped_column(sa.String, nullable=False, index=True)
description_normalized: Mapped[str | None] = mapped_column(sa.String, index=True)
class RecipeIngredient(SqlAlchemyBase):
__tablename__ = "recipes_ingredients"
id: Mapped[int] = mapped_column(sa.Integer, primary_key=True)
note: Mapped[str | None] = mapped_column(sa.String)
original_text: Mapped[str | None] = mapped_column(sa.String)
note_normalized: Mapped[str | None] = mapped_column(sa.String, index=True)
original_text_normalized: Mapped[str | None] = mapped_column(sa.String, index=True)
def do_data_migration():
bind = op.get_bind()
session = orm.Session(bind=bind)
recipes = session.execute(select(RecipeModel)).scalars().all()
ingredients = session.execute(select(RecipeIngredient)).scalars().all()
for recipe in recipes:
if recipe.name is not None:
recipe.name_normalized = unidecode(recipe.name).lower().strip()
if recipe.description is not None:
recipe.description_normalized = unidecode(recipe.description).lower().strip()
session.add(recipe)
for ingredient in ingredients:
if ingredient.note is not None:
ingredient.note_normalized = unidecode(ingredient.note).lower().strip()
if ingredient.original_text is not None:
ingredient.original_text_normalized = unidecode(ingredient.original_text).lower().strip()
session.add(ingredient)
session.commit()
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
# Set column to nullable first, since we do not have values here yet
op.add_column("recipes", sa.Column("name_normalized", sa.String(), nullable=True))
op.add_column("recipes", sa.Column("description_normalized", sa.String(), nullable=True))
op.drop_index("ix_recipes_description", table_name="recipes")
op.drop_index("ix_recipes_name", table_name="recipes")
op.create_index(op.f("ix_recipes_description_normalized"), "recipes", ["description_normalized"], unique=False)
op.create_index(op.f("ix_recipes_name_normalized"), "recipes", ["name_normalized"], unique=False)
op.add_column("recipes_ingredients", sa.Column("note_normalized", sa.String(), nullable=True))
op.add_column("recipes_ingredients", sa.Column("original_text_normalized", sa.String(), nullable=True))
op.drop_index("ix_recipes_ingredients_note", table_name="recipes_ingredients")
op.drop_index("ix_recipes_ingredients_original_text", table_name="recipes_ingredients")
op.create_index(
op.f("ix_recipes_ingredients_note_normalized"), "recipes_ingredients", ["note_normalized"], unique=False
)
op.create_index(
op.f("ix_recipes_ingredients_original_text_normalized"),
"recipes_ingredients",
["original_text_normalized"],
unique=False,
)
do_data_migration()
# Make recipes.name_normalized not nullable now that column should be filled for all rows
with op.batch_alter_table("recipes", schema=None) as batch_op:
batch_op.alter_column("name_normalized", nullable=False, existing_type=sa.String())
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(op.f("ix_recipes_ingredients_original_text_normalized"), table_name="recipes_ingredients")
op.drop_index(op.f("ix_recipes_ingredients_note_normalized"), table_name="recipes_ingredients")
op.create_index("ix_recipes_ingredients_original_text", "recipes_ingredients", ["original_text"], unique=False)
op.create_index("ix_recipes_ingredients_note", "recipes_ingredients", ["note"], unique=False)
op.drop_column("recipes_ingredients", "original_text_normalized")
op.drop_column("recipes_ingredients", "note_normalized")
op.drop_index(op.f("ix_recipes_name_normalized"), table_name="recipes")
op.drop_index(op.f("ix_recipes_description_normalized"), table_name="recipes")
op.create_index("ix_recipes_name", "recipes", ["name"], unique=False)
op.create_index("ix_recipes_description", "recipes", ["description"], unique=False)
op.drop_column("recipes", "description_normalized")
op.drop_column("recipes", "name_normalized")
# ### end Alembic commands ###

View File

@ -1,7 +1,8 @@
from typing import TYPE_CHECKING
from sqlalchemy import Boolean, Float, ForeignKey, Integer, String, orm
from sqlalchemy import Boolean, Float, ForeignKey, Integer, String, event, orm
from sqlalchemy.orm import Mapped, mapped_column
from text_unidecode import unidecode
from mealie.db.models._model_base import BaseMixins, SqlAlchemyBase
from mealie.db.models.labels import MultiPurposeLabel
@ -63,7 +64,7 @@ class RecipeIngredient(SqlAlchemyBase, BaseMixins):
recipe_id: Mapped[GUID | None] = mapped_column(GUID, ForeignKey("recipes.id"))
title: Mapped[str | None] = mapped_column(String) # Section Header - Shows if Present
note: Mapped[str | None] = mapped_column(String, index=True) # Force Show Text - Overrides Concat
note: Mapped[str | None] = mapped_column(String) # Force Show Text - Overrides Concat
# Scaling Items
unit_id: Mapped[GUID | None] = mapped_column(GUID, ForeignKey("ingredient_units.id"), index=True)
@ -73,10 +74,35 @@ class RecipeIngredient(SqlAlchemyBase, BaseMixins):
food: Mapped[IngredientFoodModel | None] = orm.relationship(IngredientFoodModel, uselist=False)
quantity: Mapped[float | None] = mapped_column(Float)
original_text: Mapped[str | None] = mapped_column(String, index=True)
original_text: Mapped[str | None] = mapped_column(String)
reference_id: Mapped[GUID | None] = mapped_column(GUID) # Reference Links
# Automatically updated by sqlalchemy event, do not write to this manually
note_normalized: Mapped[str | None] = mapped_column(String, index=True)
original_text_normalized: Mapped[str | None] = mapped_column(String, index=True)
@auto_init()
def __init__(self, **_) -> None:
pass
def __init__(self, note: str | None = None, orginal_text: str | None = None, **_) -> None:
# SQLAlchemy events do not seem to register things that are set during auto_init
if note is not None:
self.note_normalized = unidecode(note).lower().strip()
if orginal_text is not None:
self.orginal_text = unidecode(orginal_text).lower().strip()
@event.listens_for(RecipeIngredient.note, "set")
def receive_note(target: RecipeIngredient, value: str, oldvalue, initiator):
if value is not None:
target.name_normalized = unidecode(value).lower().strip()
else:
target.name_normalized = None
@event.listens_for(RecipeIngredient.original_text, "set")
def receive_original_text(target: RecipeIngredient, value: str, oldvalue, initiator):
if value is not None:
target.original_text_normalized = unidecode(value).lower().strip()
else:
target.original_text_normalized = None

View File

@ -3,8 +3,10 @@ from typing import TYPE_CHECKING
import sqlalchemy as sa
import sqlalchemy.orm as orm
from sqlalchemy import event
from sqlalchemy.ext.orderinglist import ordering_list
from sqlalchemy.orm import Mapped, mapped_column, validates
from text_unidecode import unidecode
from mealie.db.models._model_utils.guid import GUID
@ -54,8 +56,9 @@ class RecipeModel(SqlAlchemyBase, BaseMixins):
)
# General Recipe Properties
name: Mapped[str] = mapped_column(sa.String, nullable=False, index=True)
description: Mapped[str | None] = mapped_column(sa.String, index=True)
name: Mapped[str] = mapped_column(sa.String, nullable=False)
description: Mapped[str | None] = mapped_column(sa.String)
image: Mapped[str | None] = mapped_column(sa.String)
# Time Related Properties
@ -127,6 +130,10 @@ class RecipeModel(SqlAlchemyBase, BaseMixins):
cascade="all, delete-orphan",
)
# Automatically updated by sqlalchemy event, do not write to this manually
name_normalized: Mapped[str] = mapped_column(sa.String, nullable=False, index=True)
description_normalized: Mapped[str | None] = mapped_column(sa.String, index=True)
class Config:
get_attr = "slug"
exclude = {
@ -150,6 +157,8 @@ class RecipeModel(SqlAlchemyBase, BaseMixins):
def __init__(
self,
session,
name: str | None = None,
description: str | None = None,
assets: list | None = None,
notes: list[dict] | None = None,
nutrition: dict | None = None,
@ -175,3 +184,23 @@ class RecipeModel(SqlAlchemyBase, BaseMixins):
self.notes = [Note(**n) for n in notes]
self.date_updated = datetime.now()
# SQLAlchemy events do not seem to register things that are set during auto_init
if name is not None:
self.name_normalized = unidecode(name).lower().strip()
if description is not None:
self.description_normalized = unidecode(description).lower().strip()
@event.listens_for(RecipeModel.name, "set")
def receive_name(target: RecipeModel, value: str, oldvalue, initiator):
target.name_normalized = unidecode(value).lower().strip()
@event.listens_for(RecipeModel.description, "set")
def receive_description(target: RecipeModel, value: str, oldvalue, initiator):
if value is not None:
target.description_normalized = unidecode(value).lower().strip()
else:
target.description_normalized = None

View File

@ -7,6 +7,7 @@ from slugify import slugify
from sqlalchemy import Select, and_, desc, func, or_, select
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import joinedload
from text_unidecode import unidecode
from mealie.db.models.recipe.category import Category
from mealie.db.models.recipe.ingredient import RecipeIngredient
@ -150,12 +151,16 @@ class RepositoryRecipes(RepositoryGeneric[Recipe, RecipeModel]):
return ids + additional_ids
def _add_search_to_query(self, query: Select, search: str) -> Select:
normalized_search = unidecode(search).lower().strip()
# I would prefer to just do this in the recipe_ingredient.any part of the main query, but it turns out
# that at least sqlite wont use indexes for that correctly anymore and takes a big hit, so prefiltering it is
ingredient_ids = (
self.session.execute(
select(RecipeIngredient.id).filter(
or_(RecipeIngredient.note.ilike(f"%{search}%"), RecipeIngredient.original_text.ilike(f"%{search}%"))
or_(
RecipeIngredient.note_normalized.like(f"%{normalized_search}%"),
RecipeIngredient.original_text_normalized.like(f"%{normalized_search}%"),
)
)
)
.scalars()
@ -164,11 +169,11 @@ class RepositoryRecipes(RepositoryGeneric[Recipe, RecipeModel]):
q = query.filter(
or_(
RecipeModel.name.ilike(f"%{search}%"),
RecipeModel.description.ilike(f"%{search}%"),
RecipeModel.name_normalized.like(f"%{normalized_search}%"),
RecipeModel.description_normalized.like(f"%{normalized_search}%"),
RecipeModel.recipe_ingredient.any(RecipeIngredient.id.in_(ingredient_ids)),
)
).order_by(desc(RecipeModel.name.ilike(f"%{search}%")))
).order_by(desc(RecipeModel.name_normalized.like(f"%{normalized_search}%")))
return q
def page_all(

View File

@ -463,6 +463,12 @@ def test_recipe_repo_search(database: AllRepositories, unique_user: TestUser):
group_id=unique_user.group_id,
name=name_3,
),
# Test diacritics
Recipe(
user_id=unique_user.user_id,
group_id=unique_user.group_id,
name="Rátàtôuile",
),
]
for recipe in recipes:
@ -494,3 +500,9 @@ def test_recipe_repo_search(database: AllRepositories, unique_user: TestUser):
assert len(ordered_result) == 2
assert ordered_result[0].name == name_3
assert ordered_result[1].name == name_1
# Test string normalization
normalized_result = database.recipes.page_all(pagination_query, search="ratat").items
print([r.name for r in normalized_result])
assert len(normalized_result) == 1
assert normalized_result[0].name == "Rátàtôuile"