feat: diacritic-insensitive search (#2132)

* add normalized columns and use them for search * add migration to fill all normalized columns
2025-07-09 03:04:54 -04:00 · 2023-02-20 01:40:18 +01:00 · 2023-02-20 01:40:18 +01:00 · 6a5f9d7f6b
commit 6a5f9d7f6b
parent 670907b563
5 changed files with 201 additions and 11 deletions
--- a/alembic/versions/2023-02-14-20.45.41_5ab195a474eb_add_normalized_search_properties.py
+++ b/alembic/versions/2023-02-14-20.45.41_5ab195a474eb_add_normalized_search_properties.py
@ -0,0 +1,118 @@
+"""add normalized search properties
+
+Revision ID: 5ab195a474eb
+Revises: 16160bf731a0
+Create Date: 2023-02-14 20:45:41.102571
+
+"""
+import sqlalchemy as sa
+from sqlalchemy import orm, select
+from sqlalchemy.orm import Mapped, mapped_column, DeclarativeBase
+from text_unidecode import unidecode
+
+import mealie.db.migration_types
+from alembic import op
+
+from mealie.db.models._model_utils import GUID
+
+# revision identifiers, used by Alembic.
+revision = "5ab195a474eb"
+down_revision = "16160bf731a0"
+branch_labels = None
+depends_on = None
+
+
+class SqlAlchemyBase(DeclarativeBase):
+    pass
+
+
+# Intermediate table definitions
+class RecipeModel(SqlAlchemyBase):
+    __tablename__ = "recipes"
+
+    id: Mapped[GUID] = mapped_column(GUID, primary_key=True, default=GUID.generate)
+    name: Mapped[str] = mapped_column(sa.String, nullable=False)
+    description: Mapped[str | None] = mapped_column(sa.String)
+    name_normalized: Mapped[str] = mapped_column(sa.String, nullable=False, index=True)
+    description_normalized: Mapped[str | None] = mapped_column(sa.String, index=True)
+
+
+class RecipeIngredient(SqlAlchemyBase):
+    __tablename__ = "recipes_ingredients"
+
+    id: Mapped[int] = mapped_column(sa.Integer, primary_key=True)
+    note: Mapped[str | None] = mapped_column(sa.String)
+    original_text: Mapped[str | None] = mapped_column(sa.String)
+    note_normalized: Mapped[str | None] = mapped_column(sa.String, index=True)
+    original_text_normalized: Mapped[str | None] = mapped_column(sa.String, index=True)
+
+
+def do_data_migration():
+    bind = op.get_bind()
+    session = orm.Session(bind=bind)
+
+    recipes = session.execute(select(RecipeModel)).scalars().all()
+    ingredients = session.execute(select(RecipeIngredient)).scalars().all()
+    for recipe in recipes:
+        if recipe.name is not None:
+            recipe.name_normalized = unidecode(recipe.name).lower().strip()
+
+        if recipe.description is not None:
+            recipe.description_normalized = unidecode(recipe.description).lower().strip()
+        session.add(recipe)
+
+    for ingredient in ingredients:
+        if ingredient.note is not None:
+            ingredient.note_normalized = unidecode(ingredient.note).lower().strip()
+
+        if ingredient.original_text is not None:
+            ingredient.original_text_normalized = unidecode(ingredient.original_text).lower().strip()
+        session.add(ingredient)
+    session.commit()
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+
+    # Set column to nullable first, since we do not have values here yet
+    op.add_column("recipes", sa.Column("name_normalized", sa.String(), nullable=True))
+    op.add_column("recipes", sa.Column("description_normalized", sa.String(), nullable=True))
+    op.drop_index("ix_recipes_description", table_name="recipes")
+    op.drop_index("ix_recipes_name", table_name="recipes")
+    op.create_index(op.f("ix_recipes_description_normalized"), "recipes", ["description_normalized"], unique=False)
+    op.create_index(op.f("ix_recipes_name_normalized"), "recipes", ["name_normalized"], unique=False)
+    op.add_column("recipes_ingredients", sa.Column("note_normalized", sa.String(), nullable=True))
+    op.add_column("recipes_ingredients", sa.Column("original_text_normalized", sa.String(), nullable=True))
+    op.drop_index("ix_recipes_ingredients_note", table_name="recipes_ingredients")
+    op.drop_index("ix_recipes_ingredients_original_text", table_name="recipes_ingredients")
+    op.create_index(
+        op.f("ix_recipes_ingredients_note_normalized"), "recipes_ingredients", ["note_normalized"], unique=False
+    )
+    op.create_index(
+        op.f("ix_recipes_ingredients_original_text_normalized"),
+        "recipes_ingredients",
+        ["original_text_normalized"],
+        unique=False,
+    )
+    do_data_migration()
+    # Make recipes.name_normalized not nullable now that column should be filled for all rows
+    with op.batch_alter_table("recipes", schema=None) as batch_op:
+        batch_op.alter_column("name_normalized", nullable=False, existing_type=sa.String())
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index(op.f("ix_recipes_ingredients_original_text_normalized"), table_name="recipes_ingredients")
+    op.drop_index(op.f("ix_recipes_ingredients_note_normalized"), table_name="recipes_ingredients")
+    op.create_index("ix_recipes_ingredients_original_text", "recipes_ingredients", ["original_text"], unique=False)
+    op.create_index("ix_recipes_ingredients_note", "recipes_ingredients", ["note"], unique=False)
+    op.drop_column("recipes_ingredients", "original_text_normalized")
+    op.drop_column("recipes_ingredients", "note_normalized")
+    op.drop_index(op.f("ix_recipes_name_normalized"), table_name="recipes")
+    op.drop_index(op.f("ix_recipes_description_normalized"), table_name="recipes")
+    op.create_index("ix_recipes_name", "recipes", ["name"], unique=False)
+    op.create_index("ix_recipes_description", "recipes", ["description"], unique=False)
+    op.drop_column("recipes", "description_normalized")
+    op.drop_column("recipes", "name_normalized")
+    # ### end Alembic commands ###
--- a/mealie/db/models/recipe/ingredient.py
+++ b/mealie/db/models/recipe/ingredient.py
@ -1,7 +1,8 @@
 from typing import TYPE_CHECKING

-from sqlalchemy import Boolean, Float, ForeignKey, Integer, String, orm
+from sqlalchemy import Boolean, Float, ForeignKey, Integer, String, event, orm
 from sqlalchemy.orm import Mapped, mapped_column
+from text_unidecode import unidecode

 from mealie.db.models._model_base import BaseMixins, SqlAlchemyBase
 from mealie.db.models.labels import MultiPurposeLabel
@ -63,7 +64,7 @@ class RecipeIngredient(SqlAlchemyBase, BaseMixins):
    recipe_id: Mapped[GUID | None] = mapped_column(GUID, ForeignKey("recipes.id"))

    title: Mapped[str | None] = mapped_column(String)  # Section Header - Shows if Present
-    note: Mapped[str | None] = mapped_column(String, index=True)  # Force Show Text - Overrides Concat
+    note: Mapped[str | None] = mapped_column(String)  # Force Show Text - Overrides Concat

    # Scaling Items
    unit_id: Mapped[GUID | None] = mapped_column(GUID, ForeignKey("ingredient_units.id"), index=True)
@ -73,10 +74,35 @@ class RecipeIngredient(SqlAlchemyBase, BaseMixins):
    food: Mapped[IngredientFoodModel | None] = orm.relationship(IngredientFoodModel, uselist=False)
    quantity: Mapped[float | None] = mapped_column(Float)

-    original_text: Mapped[str | None] = mapped_column(String, index=True)
+    original_text: Mapped[str | None] = mapped_column(String)

    reference_id: Mapped[GUID | None] = mapped_column(GUID)  # Reference Links

+    # Automatically updated by sqlalchemy event, do not write to this manually
+    note_normalized: Mapped[str | None] = mapped_column(String, index=True)
+    original_text_normalized: Mapped[str | None] = mapped_column(String, index=True)
+
    @auto_init()
-    def __init__(self, **_) -> None:
-        pass
+    def __init__(self, note: str | None = None, orginal_text: str | None = None, **_) -> None:
+        # SQLAlchemy events do not seem to register things that are set during auto_init
+        if note is not None:
+            self.note_normalized = unidecode(note).lower().strip()
+
+        if orginal_text is not None:
+            self.orginal_text = unidecode(orginal_text).lower().strip()
+
+
+@event.listens_for(RecipeIngredient.note, "set")
+def receive_note(target: RecipeIngredient, value: str, oldvalue, initiator):
+    if value is not None:
+        target.name_normalized = unidecode(value).lower().strip()
+    else:
+        target.name_normalized = None
+
+
+@event.listens_for(RecipeIngredient.original_text, "set")
+def receive_original_text(target: RecipeIngredient, value: str, oldvalue, initiator):
+    if value is not None:
+        target.original_text_normalized = unidecode(value).lower().strip()
+    else:
+        target.original_text_normalized = None
--- a/mealie/db/models/recipe/recipe.py
+++ b/mealie/db/models/recipe/recipe.py
@ -3,8 +3,10 @@ from typing import TYPE_CHECKING

 import sqlalchemy as sa
 import sqlalchemy.orm as orm
+from sqlalchemy import event
 from sqlalchemy.ext.orderinglist import ordering_list
 from sqlalchemy.orm import Mapped, mapped_column, validates
+from text_unidecode import unidecode

 from mealie.db.models._model_utils.guid import GUID

@ -54,8 +56,9 @@ class RecipeModel(SqlAlchemyBase, BaseMixins):
    )

    # General Recipe Properties
-    name: Mapped[str] = mapped_column(sa.String, nullable=False, index=True)
-    description: Mapped[str | None] = mapped_column(sa.String, index=True)
+    name: Mapped[str] = mapped_column(sa.String, nullable=False)
+    description: Mapped[str | None] = mapped_column(sa.String)
+
    image: Mapped[str | None] = mapped_column(sa.String)

    # Time Related Properties
@ -127,6 +130,10 @@ class RecipeModel(SqlAlchemyBase, BaseMixins):
        cascade="all, delete-orphan",
    )

+    # Automatically updated by sqlalchemy event, do not write to this manually
+    name_normalized: Mapped[str] = mapped_column(sa.String, nullable=False, index=True)
+    description_normalized: Mapped[str | None] = mapped_column(sa.String, index=True)
+
    class Config:
        get_attr = "slug"
        exclude = {
@ -150,6 +157,8 @@ class RecipeModel(SqlAlchemyBase, BaseMixins):
    def __init__(
        self,
        session,
+        name: str | None = None,
+        description: str | None = None,
        assets: list | None = None,
        notes: list[dict] | None = None,
        nutrition: dict | None = None,
@ -175,3 +184,23 @@ class RecipeModel(SqlAlchemyBase, BaseMixins):
            self.notes = [Note(**n) for n in notes]

        self.date_updated = datetime.now()
+
+        # SQLAlchemy events do not seem to register things that are set during auto_init
+        if name is not None:
+            self.name_normalized = unidecode(name).lower().strip()
+
+        if description is not None:
+            self.description_normalized = unidecode(description).lower().strip()
+
+
+@event.listens_for(RecipeModel.name, "set")
+def receive_name(target: RecipeModel, value: str, oldvalue, initiator):
+    target.name_normalized = unidecode(value).lower().strip()
+
+
+@event.listens_for(RecipeModel.description, "set")
+def receive_description(target: RecipeModel, value: str, oldvalue, initiator):
+    if value is not None:
+        target.description_normalized = unidecode(value).lower().strip()
+    else:
+        target.description_normalized = None
--- a/mealie/repos/repository_recipes.py
+++ b/mealie/repos/repository_recipes.py
@ -7,6 +7,7 @@ from slugify import slugify
 from sqlalchemy import Select, and_, desc, func, or_, select
 from sqlalchemy.exc import IntegrityError
 from sqlalchemy.orm import joinedload
+from text_unidecode import unidecode

 from mealie.db.models.recipe.category import Category
 from mealie.db.models.recipe.ingredient import RecipeIngredient
@ -150,12 +151,16 @@ class RepositoryRecipes(RepositoryGeneric[Recipe, RecipeModel]):
        return ids + additional_ids

    def _add_search_to_query(self, query: Select, search: str) -> Select:
+        normalized_search = unidecode(search).lower().strip()
        # I would prefer to just do this in the recipe_ingredient.any part of the main query, but it turns out
        # that at least sqlite wont use indexes for that correctly anymore and takes a big hit, so prefiltering it is
        ingredient_ids = (
            self.session.execute(
                select(RecipeIngredient.id).filter(
-                    or_(RecipeIngredient.note.ilike(f"%{search}%"), RecipeIngredient.original_text.ilike(f"%{search}%"))
+                    or_(
+                        RecipeIngredient.note_normalized.like(f"%{normalized_search}%"),
+                        RecipeIngredient.original_text_normalized.like(f"%{normalized_search}%"),
+                    )
                )
            )
            .scalars()
@ -164,11 +169,11 @@ class RepositoryRecipes(RepositoryGeneric[Recipe, RecipeModel]):

        q = query.filter(
            or_(
-                RecipeModel.name.ilike(f"%{search}%"),
-                RecipeModel.description.ilike(f"%{search}%"),
+                RecipeModel.name_normalized.like(f"%{normalized_search}%"),
+                RecipeModel.description_normalized.like(f"%{normalized_search}%"),
                RecipeModel.recipe_ingredient.any(RecipeIngredient.id.in_(ingredient_ids)),
            )
-        ).order_by(desc(RecipeModel.name.ilike(f"%{search}%")))
+        ).order_by(desc(RecipeModel.name_normalized.like(f"%{normalized_search}%")))
        return q

    def page_all(
--- a/tests/unit_tests/repository_tests/test_recipe_repository.py
+++ b/tests/unit_tests/repository_tests/test_recipe_repository.py
@ -463,6 +463,12 @@ def test_recipe_repo_search(database: AllRepositories, unique_user: TestUser):
            group_id=unique_user.group_id,
            name=name_3,
        ),
+        # Test diacritics
+        Recipe(
+            user_id=unique_user.user_id,
+            group_id=unique_user.group_id,
+            name="Rátàtôuile",
+        ),
    ]

    for recipe in recipes:
@ -494,3 +500,9 @@ def test_recipe_repo_search(database: AllRepositories, unique_user: TestUser):
    assert len(ordered_result) == 2
    assert ordered_result[0].name == name_3
    assert ordered_result[1].name == name_1
+
+    # Test string normalization
+    normalized_result = database.recipes.page_all(pagination_query, search="ratat").items
+    print([r.name for r in normalized_result])
+    assert len(normalized_result) == 1
+    assert normalized_result[0].name == "Rátàtôuile"