diff --git a/alembic/versions/2023-02-14-20.45.41_5ab195a474eb_add_normalized_search_properties.py b/alembic/versions/2023-02-14-20.45.41_5ab195a474eb_add_normalized_search_properties.py index 2cecd66cc196..773aaf5c1212 100644 --- a/alembic/versions/2023-02-14-20.45.41_5ab195a474eb_add_normalized_search_properties.py +++ b/alembic/versions/2023-02-14-20.45.41_5ab195a474eb_add_normalized_search_properties.py @@ -7,12 +7,11 @@ Create Date: 2023-02-14 20:45:41.102571 """ import sqlalchemy as sa from sqlalchemy import orm, select -from sqlalchemy.orm import Mapped, mapped_column, DeclarativeBase +from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column from text_unidecode import unidecode import mealie.db.migration_types from alembic import op - from mealie.db.models._model_utils import GUID # revision identifiers, used by Alembic. @@ -52,30 +51,46 @@ def do_data_migration(): session = orm.Session(bind=bind) recipes = session.execute(select(RecipeModel)).scalars().all() - ingredients = session.execute(select(RecipeIngredient)).scalars().all() for recipe in recipes: if recipe.name is not None: - recipe.name_normalized = unidecode(recipe.name).lower().strip() + session.execute( + sa.text( + f"UPDATE {RecipeModel.__tablename__} SET name_normalized=:name_normalized WHERE id=:id" + ).bindparams(name_normalized=unidecode(recipe.name).lower().strip(), id=recipe.id) + ) if recipe.description is not None: - recipe.description_normalized = unidecode(recipe.description).lower().strip() - session.add(recipe) + session.execute( + sa.text( + f"UPDATE {RecipeModel.__tablename__} SET description_normalized=:description_normalized WHERE id=:id" + ).bindparams(description_normalized=unidecode(recipe.description).lower().strip(), id=recipe.id) + ) + ingredients = session.execute(select(RecipeIngredient)).scalars().all() for ingredient in ingredients: if ingredient.note is not None: - ingredient.note_normalized = unidecode(ingredient.note).lower().strip() + session.execute( + sa.text( + f"UPDATE {RecipeIngredient.__tablename__} SET note_normalized=:note_normalized WHERE id=:id" + ).bindparams(note_normalized=unidecode(ingredient.note).lower().strip(), id=ingredient.id) + ) if ingredient.original_text is not None: - ingredient.original_text_normalized = unidecode(ingredient.original_text).lower().strip() - session.add(ingredient) + session.execute( + sa.text( + f"UPDATE {RecipeIngredient.__tablename__} SET original_text_normalized=:original_text_normalized WHERE id=:id" + ).bindparams( + original_text_normalized=unidecode(ingredient.original_text).lower().strip(), id=ingredient.id + ) + ) session.commit() def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - # Set column to nullable first, since we do not have values here yet - op.add_column("recipes", sa.Column("name_normalized", sa.String(), nullable=True)) + # Set column default first, since we do not have values here yet + op.add_column("recipes", sa.Column("name_normalized", sa.String(), nullable=False, server_default="")) op.add_column("recipes", sa.Column("description_normalized", sa.String(), nullable=True)) op.drop_index("ix_recipes_description", table_name="recipes") op.drop_index("ix_recipes_name", table_name="recipes") @@ -95,9 +110,9 @@ def upgrade(): unique=False, ) do_data_migration() - # Make recipes.name_normalized not nullable now that column should be filled for all rows + # Remove server default now that column should be filled for all rows with op.batch_alter_table("recipes", schema=None) as batch_op: - batch_op.alter_column("name_normalized", nullable=False, existing_type=sa.String()) + batch_op.alter_column("name_normalized", existing_type=sa.String(), server_default=None) # ### end Alembic commands ### diff --git a/alembic/versions/2023-02-21-22.03.19_b04a08da2108_added_shopping_list_label_settings.py b/alembic/versions/2023-02-21-22.03.19_b04a08da2108_added_shopping_list_label_settings.py index da426cb7a04b..edca93c2b6a4 100644 --- a/alembic/versions/2023-02-21-22.03.19_b04a08da2108_added_shopping_list_label_settings.py +++ b/alembic/versions/2023-02-21-22.03.19_b04a08da2108_added_shopping_list_label_settings.py @@ -24,10 +24,10 @@ depends_on = None def populate_shopping_lists_multi_purpose_labels(shopping_lists_multi_purpose_labels_table: sa.Table, session: Session): shopping_lists = session.query(ShoppingList).all() - labels = session.query(MultiPurposeLabel).all() shopping_lists_labels_data: list[dict] = [] for shopping_list in shopping_lists: + labels = session.query(MultiPurposeLabel).filter(MultiPurposeLabel.group_id == ShoppingList.group_id).all() for i, label in enumerate(labels): shopping_lists_labels_data.append( {"id": uuid4(), "shopping_list_id": shopping_list.id, "label_id": label.id, "position": i} diff --git a/alembic/versions/2023-08-06-21.00.34_04ac51cbe9a4_added_group_slug.py b/alembic/versions/2023-08-06-21.00.34_04ac51cbe9a4_added_group_slug.py index 9d846adf4dbc..be341fde57f4 100644 --- a/alembic/versions/2023-08-06-21.00.34_04ac51cbe9a4_added_group_slug.py +++ b/alembic/versions/2023-08-06-21.00.34_04ac51cbe9a4_added_group_slug.py @@ -24,17 +24,22 @@ def populate_group_slugs(session: Session): seen_slugs: set[str] = set() for group in groups: original_name = group.name + new_name = original_name attempts = 0 while True: - slug = slugify(group.name) + slug = slugify(new_name) if slug not in seen_slugs: break attempts += 1 - group.name = f"{original_name} ({attempts})" + new_name = f"{original_name} ({attempts})" seen_slugs.add(slug) - group.slug = slug + session.execute( + sa.text(f"UPDATE {Group.__tablename__} SET name=:name, slug=:slug WHERE id=:id").bindparams( + name=new_name, slug=slug, id=group.id + ) + ) session.commit() diff --git a/dev/code-generation/utils/anonymize_backups.py b/dev/code-generation/utils/anonymize_backups.py new file mode 100644 index 000000000000..58ee02b7d06b --- /dev/null +++ b/dev/code-generation/utils/anonymize_backups.py @@ -0,0 +1,74 @@ +import json +import logging +import random +import string +from datetime import datetime +from uuid import UUID + +logger = logging.getLogger("anonymize_backups") + + +def is_uuid4(value: str): + try: + UUID(value) + return True + except ValueError: + return False + + +def is_iso_datetime(value: str): + try: + datetime.fromisoformat(value) + return True + except ValueError: + return False + + +def random_string(length=10): + return "".join(random.choice(string.ascii_lowercase) for _ in range(length)) + + +def clean_value(value): + try: + match value: + # preserve non-strings + case int(value) | float(value): + return value + case None: + return value + # preserve UUIDs and datetimes + case str(value) if is_uuid4(value) or is_iso_datetime(value): + return value + # randomize strings + case str(value): + return random_string() + case _: + pass + + except Exception as e: + logger.exception(e) + + logger.error(f"Failed to anonymize value: {value}") + return value + + +def walk_data_and_anonymize(data): + for k, v in data.items(): + if isinstance(v, list): + for item in v: + walk_data_and_anonymize(item) + else: + # preserve alembic version number and enums + if k in ["auth_method", "version_num"]: + continue + + data[k] = clean_value(v) + + +def anonymize_database_json(input_filepath: str, output_filepath: str): + with open(input_filepath) as f: + data = json.load(f) + + walk_data_and_anonymize(data) + with open(output_filepath, "w") as f: + json.dump(data, f) diff --git a/mealie/db/fixes/fix_migration_data.py b/mealie/db/fixes/fix_migration_data.py new file mode 100644 index 000000000000..0b74133a9c19 --- /dev/null +++ b/mealie/db/fixes/fix_migration_data.py @@ -0,0 +1,150 @@ +from uuid import uuid4 + +from slugify import slugify +from sqlalchemy.orm import Session + +from mealie.core import root_logger +from mealie.db.models.group.group import Group +from mealie.db.models.group.shopping_list import ShoppingList, ShoppingListMultiPurposeLabel +from mealie.db.models.labels import MultiPurposeLabel +from mealie.db.models.recipe.ingredient import IngredientFoodModel, IngredientUnitModel +from mealie.db.models.recipe.recipe import RecipeModel + +logger = root_logger.get_logger("init_db") + + +def fix_recipe_normalized_search_properties(session: Session): + recipes = session.query(RecipeModel).all() + recipes_fixed = False + + for recipe in recipes: + add_to_session = False + if recipe.name and not recipe.name_normalized: + recipe.name_normalized = RecipeModel.normalize(recipe.name) + add_to_session = True + if recipe.description and not recipe.description_normalized: + recipe.description_normalized = RecipeModel.normalize(recipe.description) + add_to_session = True + + for ingredient in recipe.recipe_ingredient: + if ingredient.note and not ingredient.note_normalized: + ingredient.note_normalized = RecipeModel.normalize(ingredient.note) + add_to_session = True + if ingredient.original_text and not ingredient.original_text_normalized: + ingredient.original_text = RecipeModel.normalize(ingredient.original_text_normalized) + add_to_session = True + + if add_to_session: + recipes_fixed = True + session.add(recipe) + + if recipes_fixed: + logger.info("Updating recipe normalized search properties") + session.commit() + + +def fix_shopping_list_label_settings(session: Session): + shopping_lists = session.query(ShoppingList).all() + labels = session.query(MultiPurposeLabel).all() + label_settings_fixed = False + + for shopping_list in shopping_lists: + labels_by_id = {label.id: label for label in labels if label.group_id == shopping_list.group_id} + for label_setting in shopping_list.label_settings: + if not labels_by_id.pop(label_setting.label_id, None): + # label setting is no longer valid, so delete it + session.delete(label_setting) + label_settings_fixed = True + + if not labels_by_id: + # all labels are accounted for, so we don't need to add any + continue + + label_settings_fixed = True + for i, label in enumerate(labels_by_id.values()): + new_label_setting = ShoppingListMultiPurposeLabel( + id=uuid4(), + shopping_list_id=shopping_list.id, + label_id=label.id, + position=i + len(shopping_list.label_settings), + ) + + session.add(new_label_setting) + + if label_settings_fixed: + logger.info("Fixing shopping list label settings") + session.commit() + + +def fix_group_slugs(session: Session): + groups = session.query(Group).all() + seen_slugs: set[str] = set() + groups_fixed = False + + for group in groups: + if not group.slug: + original_name = group.name + new_name = original_name + attempts = 0 + while True: + slug = slugify(group.name) + if slug not in seen_slugs: + break + + attempts += 1 + new_name = f"{original_name} ({attempts})" + + groups_fixed = True + group.name = new_name + group.slug = slug + + if groups_fixed: + logger.info("Adding missing group slugs") + session.commit() + + +def fix_normalized_unit_and_food_names(session: Session): + units = session.query(IngredientUnitModel).all() + units_fixed = False + + for unit in units: + add_to_session = False + if unit.name and not unit.name_normalized: + unit.name_normalized = IngredientUnitModel.normalize(unit.name) + add_to_session = True + if unit.abbreviation and not unit.abbreviation_normalized: + unit.abbreviation_normalized = IngredientUnitModel.normalize(unit.abbreviation) + add_to_session = True + + if add_to_session: + units_fixed = True + session.add(unit) + + if units_fixed: + logger.info("Updating unit normalized search properties") + session.commit() + + foods = session.query(IngredientFoodModel).all() + foods_fixed = False + + for food in foods: + add_to_session = False + if food.name and not food.name_normalized: + food.name_normalized = IngredientFoodModel.normalize(food.name) + add_to_session = True + + if add_to_session: + foods_fixed = True + session.add(food) + + if foods_fixed: + logger.info("Updating food normalized search properties") + session.commit() + + +def fix_migration_data(session: Session): + logger.info("Checking for migration data fixes") + fix_recipe_normalized_search_properties(session) + fix_shopping_list_label_settings(session) + fix_group_slugs(session) + fix_normalized_unit_and_food_names(session) diff --git a/mealie/db/init_db.py b/mealie/db/init_db.py index 3ac888c18110..1fcefe1c21bf 100644 --- a/mealie/db/init_db.py +++ b/mealie/db/init_db.py @@ -11,6 +11,7 @@ from mealie.core import root_logger from mealie.core.config import get_app_settings from mealie.db.db_setup import session_context from mealie.db.fixes.fix_group_with_no_name import fix_group_with_no_name +from mealie.db.fixes.fix_migration_data import fix_migration_data from mealie.db.fixes.fix_slug_foods import fix_slug_food_names from mealie.repos.all_repositories import get_repositories from mealie.repos.repository_factory import AllRepositories @@ -97,6 +98,9 @@ def main(): session.execute(text("CREATE EXTENSION IF NOT EXISTS pg_trgm;")) db = get_repositories(session) + safe_try(lambda: fix_migration_data(session)) + safe_try(lambda: fix_slug_food_names(db)) + safe_try(lambda: fix_group_with_no_name(session)) if db.users.get_all(): logger.debug("Database exists") @@ -104,9 +108,6 @@ def main(): logger.info("Database contains no users, initializing...") init_db(db) - safe_try(lambda: fix_slug_food_names(db)) - safe_try(lambda: fix_group_with_no_name(session)) - if __name__ == "__main__": main() diff --git a/mealie/db/models/_model_utils/guid.py b/mealie/db/models/_model_utils/guid.py index f8b573183107..813bfdda635f 100644 --- a/mealie/db/models/_model_utils/guid.py +++ b/mealie/db/models/_model_utils/guid.py @@ -1,5 +1,7 @@ import uuid +from typing import Any +from sqlalchemy import Dialect from sqlalchemy.dialects.postgresql import UUID from sqlalchemy.types import CHAR, TypeDecorator @@ -17,13 +19,8 @@ class GUID(TypeDecorator): def generate(): return uuid.uuid4() - def load_dialect_impl(self, dialect): - if dialect.name == "postgresql": - return dialect.type_descriptor(UUID()) - else: - return dialect.type_descriptor(CHAR(32)) - - def process_bind_param(self, value, dialect): + @staticmethod + def convert_value_to_guid(value: Any, dialect: Dialect) -> str | None: if value is None: return value elif dialect.name == "postgresql": @@ -35,7 +32,25 @@ class GUID(TypeDecorator): # hexstring return "%.32x" % value.int + def load_dialect_impl(self, dialect): + if dialect.name == "postgresql": + return dialect.type_descriptor(UUID()) + else: + return dialect.type_descriptor(CHAR(32)) + + def process_bind_param(self, value, dialect): + return self.convert_value_to_guid(value, dialect) + + def _uuid_value(self, value): + if value is None: + return value + else: + if not isinstance(value, uuid.UUID): + value = uuid.UUID(value) + return value + def process_result_value(self, value, dialect): - if value is not None and not isinstance(value, uuid.UUID): - value = uuid.UUID(value) - return value + return self._uuid_value(value) + + def sort_key_function(self, value): + return self._uuid_value(value) diff --git a/mealie/services/backups_v2/alchemy_exporter.py b/mealie/services/backups_v2/alchemy_exporter.py index eba567cccb3d..5ad2ac20c5a3 100644 --- a/mealie/services/backups_v2/alchemy_exporter.py +++ b/mealie/services/backups_v2/alchemy_exporter.py @@ -1,4 +1,5 @@ import datetime +import uuid from os import path from pathlib import Path @@ -10,6 +11,8 @@ from sqlalchemy.orm import sessionmaker from alembic import command from alembic.config import Config +from mealie.db import init_db +from mealie.db.models._model_utils import GUID from mealie.services._base_service import BaseService PROJECT_DIR = Path(__file__).parent.parent.parent.parent @@ -38,23 +41,33 @@ class AlchemyExporter(BaseService): self.session_maker = sessionmaker(bind=self.engine) @staticmethod - def convert_to_datetime(data: dict) -> dict: + def is_uuid(value: str) -> bool: + try: + uuid.UUID(value) + return True + except ValueError: + return False + + def convert_types(self, data: dict) -> dict: """ - walks the dictionary to convert all things that look like timestamps to datetime objects + walks the dictionary to restore all things that look like string representations of their complex types used in the context of reading a json file into a database via SQLAlchemy. """ for key, value in data.items(): if isinstance(value, dict): - data = AlchemyExporter.convert_to_datetime(value) + data = self.convert_types(value) elif isinstance(value, list): # assume that this is a list of dictionaries - data[key] = [AlchemyExporter.convert_to_datetime(item) for item in value] + data[key] = [self.convert_types(item) for item in value] elif isinstance(value, str): - if key in AlchemyExporter.look_for_datetime: - data[key] = AlchemyExporter.DateTimeParser(dt=value).dt - if key in AlchemyExporter.look_for_date: - data[key] = AlchemyExporter.DateTimeParser(date=value).date - if key in AlchemyExporter.look_for_time: - data[key] = AlchemyExporter.DateTimeParser(time=value).time + if self.is_uuid(value): + # convert the data to the current database's native GUID type + data[key] = GUID.convert_value_to_guid(value, self.engine.dialect) + if key in self.look_for_datetime: + data[key] = self.DateTimeParser(dt=value).dt + if key in self.look_for_date: + data[key] = self.DateTimeParser(date=value).date + if key in self.look_for_time: + data[key] = self.DateTimeParser(time=value).time return data def dump_schema(self) -> dict: @@ -105,7 +118,7 @@ class AlchemyExporter(BaseService): del db_dump["alembic_version"] """Restores all data from dictionary into the database""" with self.engine.begin() as connection: - data = AlchemyExporter.convert_to_datetime(db_dump) + data = self.convert_types(db_dump) self.meta.reflect(bind=self.engine) for table_name, rows in data.items(): @@ -139,8 +152,8 @@ SELECT SETVAL('shopping_list_item_extras_id_seq', (SELECT MAX(id) FROM shopping_ ) ) - # Run all migrations up to current version - command.upgrade(alembic_cfg, "head") + # Re-init database to finish migrations + init_db.main() def drop_all(self) -> None: """Drops all data from the database""" diff --git a/mealie/services/backups_v2/backup_v2.py b/mealie/services/backups_v2/backup_v2.py index d2c1d87fd4fc..f5b6b2b814c5 100644 --- a/mealie/services/backups_v2/backup_v2.py +++ b/mealie/services/backups_v2/backup_v2.py @@ -83,7 +83,7 @@ class BackupV2(BaseService): # Validation if not contents.validate(): self.logger.error( - "Invalid backup file. file does not contain required elements (data directory and database.json" + "Invalid backup file. file does not contain required elements (data directory and database.json)" ) raise ValueError("Invalid backup file") diff --git a/tests/data/__init__.py b/tests/data/__init__.py index 588d2c854197..d24a7f9509d5 100644 --- a/tests/data/__init__.py +++ b/tests/data/__init__.py @@ -4,6 +4,15 @@ CWD = Path(__file__).parent locale_dir = CWD / "locale" +backup_version_44e8d670719d = CWD / "backups/backup_version_44e8d670719d.zip" +"""44e8d670719d: add extras to shopping lists, list items, and ingredient foods""" + +backup_version_ba1e4a6cfe99 = CWD / "backups/backup_version_ba1e4a6cfe99.zip" +"""ba1e4a6cfe99: added plural names and alias tables for foods and units""" + +backup_version_bcfdad6b7355 = CWD / "backups/backup_version_bcfdad6b7355.zip" +"""bcfdad6b7355: remove tool name and slug unique contraints""" + migrations_paprika = CWD / "migrations/paprika.zip" migrations_chowdown = CWD / "migrations/chowdown.zip" diff --git a/tests/data/backups/backup_version_44e8d670719d.zip b/tests/data/backups/backup_version_44e8d670719d.zip new file mode 100644 index 000000000000..5a707a4f5c33 Binary files /dev/null and b/tests/data/backups/backup_version_44e8d670719d.zip differ diff --git a/tests/data/backups/backup_version_ba1e4a6cfe99.zip b/tests/data/backups/backup_version_ba1e4a6cfe99.zip new file mode 100644 index 000000000000..f8999a5fb649 Binary files /dev/null and b/tests/data/backups/backup_version_ba1e4a6cfe99.zip differ diff --git a/tests/data/backups/backup_version_bcfdad6b7355.zip b/tests/data/backups/backup_version_bcfdad6b7355.zip new file mode 100644 index 000000000000..24b6b513e8d0 Binary files /dev/null and b/tests/data/backups/backup_version_bcfdad6b7355.zip differ diff --git a/tests/unit_tests/services_tests/backup_v2_tests/test_backup_v2.py b/tests/unit_tests/services_tests/backup_v2_tests/test_backup_v2.py index ddee9a46bdb1..453755246d61 100644 --- a/tests/unit_tests/services_tests/backup_v2_tests/test_backup_v2.py +++ b/tests/unit_tests/services_tests/backup_v2_tests/test_backup_v2.py @@ -1,8 +1,18 @@ import filecmp from pathlib import Path -from typing import Any +from typing import Any, cast +import pytest +from sqlalchemy.orm import Session + +import tests.data as test_data from mealie.core.config import get_app_settings +from mealie.db.db_setup import session_context +from mealie.db.models.group import Group +from mealie.db.models.group.shopping_list import ShoppingList +from mealie.db.models.labels import MultiPurposeLabel +from mealie.db.models.recipe.ingredient import IngredientFoodModel, IngredientUnitModel +from mealie.db.models.recipe.recipe import RecipeModel from mealie.services.backups_v2.alchemy_exporter import AlchemyExporter from mealie.services.backups_v2.backup_file import BackupFile from mealie.services.backups_v2.backup_v2 import BackupV2 @@ -56,3 +66,88 @@ def test_database_restore(): for s1, s2 in zip(snapshop_1, snapshop_2): assert snapshop_1[s1].sort(key=dict_sorter) == snapshop_2[s2].sort(key=dict_sorter) + + +@pytest.mark.parametrize( + "backup_path", + [ + test_data.backup_version_44e8d670719d, + test_data.backup_version_ba1e4a6cfe99, + test_data.backup_version_bcfdad6b7355, + ], + ids=[ + "44e8d670719d: add extras to shopping lists, list items, and ingredient foods", + "ba1e4a6cfe99: added plural names and alias tables for foods and units", + "bcfdad6b7355: remove tool name and slug unique contraints", + ], +) +def test_database_restore_data(backup_path: Path): + """ + This tests real user backups to make sure the data is restored correctly. The data has been anonymized, but + relationships and data types should be preserved. + + This test should verify all migrations that do some sort of database manipulation (e.g. populating a new column). + If a new migration is added that does any sort of data manipulation, this test should be updated. + """ + + settings = get_app_settings() + backup_v2 = BackupV2(settings.DB_URL) + + # create a backup of the existing data so we can restore it later + original_data_backup = backup_v2.backup() + + try: + assert backup_path.exists() + backup_v2.restore(backup_path) + + # make sure migrations populated data successfully + with session_context() as session: + session = cast(Session, session) + + groups = session.query(Group).all() + recipes = session.query(RecipeModel).all() + shopping_lists = session.query(ShoppingList).all() + labels = session.query(MultiPurposeLabel).all() + + foods = session.query(IngredientFoodModel).all() + units = session.query(IngredientUnitModel).all() + + # 2023-02-14-20.45.41_5ab195a474eb_add_normalized_search_properties + for recipe in recipes: + if recipe.name: + assert recipe.name_normalized + if recipe.description: + assert recipe.description_normalized + + for ingredient in recipe.recipe_ingredient: + if ingredient.note: + assert ingredient.note_normalized + if ingredient.original_text: + assert ingredient.original_text_normalized + + # 2023-02-21-22.03.19_b04a08da2108_added_shopping_list_label_settings + for shopping_list in shopping_lists: + group_labels = [label for label in labels if label.group_id == shopping_list.group_id] + assert len(shopping_list.label_settings) == len(group_labels) + for label_setting, label in zip( + sorted(shopping_list.label_settings, key=lambda x: x.label.id), + sorted(group_labels, key=lambda x: x.id), + strict=True, + ): + assert label_setting.label == label + + # 2023-08-06-21.00.34_04ac51cbe9a4_added_group_slug + for group in groups: + assert group.slug + + # 2023-09-01-14.55.42_0341b154f79a_added_normalized_unit_and_food_names + for food in foods: + if food.name: + assert food.name_normalized + + for unit in units: + assert unit.name_normalized + if unit.abbreviation: + assert unit.abbreviation_normalized + finally: + backup_v2.restore(original_data_backup)