feat: Migrate from Copy Me That (#2212)

* implemented copymethat migration

* added migration tree

* added translation support

* genericized example jpgs

* added test data

* fixed test archive

* switched recipe create to service
added test for timeline event creation

* linting

* lxml go brrr
This commit is contained in:
Michael Genson 2023-03-12 15:37:24 -05:00 committed by GitHub
parent 3ce8fa9492
commit 3118b0e423
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 288 additions and 36 deletions

1
.gitignore vendored
View File

@ -12,6 +12,7 @@ docs/site/
frontend/dist/
dev/code-generation/generated/*
dev/data/mealie.db-journal
dev/data/backups/*
dev/data/debug/*
dev/data/img/*

View File

@ -283,17 +283,34 @@
"meal-plan-settings": "Meal Plan Settings"
},
"migration": {
"migration-data-removed": "Migration data removed",
"new-migration": "New Migration",
"no-file-selected": "No File Selected",
"no-migration-data-available": "No Migration Data Available",
"previous-migrations": "Previous Migrations",
"recipe-migration": "Recipe Migration",
"chowdown": {
"description": "Migrate data from Chowdown",
"description-long": "Mealie natively supports the chowdown repository format. Download the code repository as a .zip file and upload it below.",
"title": "Chowdown"
},
"migration-data-removed": "Migration data removed",
"nextcloud": {
"description": "Migrate data from a Nextcloud Cookbook instance",
"description-long": "Nextcloud recipes can be imported from a zip file that contains the data stored in Nextcloud. See the example folder structure below to ensure your recipes are able to be imported.",
"title": "Nextcloud Cookbook"
},
"no-migration-data-available": "No Migration Data Available",
"recipe-migration": "Recipe Migration"
"copymethat": {
"description-long": "Mealie can import recipes from Copy Me That. Export your recipes in HTML format, then upload the .zip below.",
"title": "Copy Me That Recipe Manager"
},
"paprika": {
"description-long": "Mealie can import recipes from the Paprika application. Export your recipes from paprika, rename the export extension to .zip and upload it below.",
"title": "Paprika Recipe Manager"
},
"mealie-pre-v1": {
"description-long": "Mealie can import recipes from the Mealie application from a pre v1.0 release. Export your recipes from your old instance, and upload the zip file below. Note that only recipes can be imported from the export.",
"title": "Mealie Pre v1.0"
}
},
"new-recipe": {
"bulk-add": "Bulk Add",

View File

@ -6,7 +6,7 @@
*/
export type WebhookType = "mealplan";
export type SupportedMigrations = "nextcloud" | "chowdown" | "paprika" | "mealie_alpha";
export type SupportedMigrations = "nextcloud" | "chowdown" | "copymethat" | "paprika" | "mealie_alpha";
export interface CreateGroupPreferences {
privateGroup?: boolean;

View File

@ -14,7 +14,7 @@
Mealie.
</BasePageTitle>
<v-container>
<BaseCardSectionTitle title="New Migration"> </BaseCardSectionTitle>
<BaseCardSectionTitle :title="$i18n.tc('migration.new-migration')"> </BaseCardSectionTitle>
<v-card outlined :loading="loading">
<v-card-title> Choose Migration Type </v-card-title>
<v-card-text v-if="content" class="pb-0">
@ -39,7 +39,7 @@
:text-btn="false"
@uploaded="setFileObject"
/>
{{ fileObject.name || "No file selected" }}
{{ fileObject.name || $i18n.tc('migration.no-file-selected') }}
</v-card-text>
<v-card-text>
@ -58,7 +58,7 @@
</v-card>
</v-container>
<v-container>
<BaseCardSectionTitle title="Previous Migrations"> </BaseCardSectionTitle>
<BaseCardSectionTitle :title="$i18n.tc('migration.previous-migrations')"> </BaseCardSectionTitle>
<ReportTable :items="reports" @delete="deleteReport" />
</v-container>
</v-container>
@ -74,13 +74,14 @@ import { SupportedMigrations } from "~/lib/api/types/group";
const MIGRATIONS = {
nextcloud: "nextcloud",
chowdown: "chowdown",
copymethat: "copymethat",
paprika: "paprika",
mealie: "mealie_alpha",
};
export default defineComponent({
setup() {
const { $globals } = useContext();
const { $globals, i18n } = useContext();
const api = useUserApi();
@ -95,26 +96,30 @@ export default defineComponent({
const items: MenuItem[] = [
{
text: "Nextcloud",
text: i18n.tc("migration.nextcloud.title"),
value: MIGRATIONS.nextcloud,
},
{
text: "Chowdown",
text: i18n.tc("migration.chowdown.title"),
value: MIGRATIONS.chowdown,
},
{
text: "Paprika",
text: i18n.tc("migration.copymethat.title"),
value: MIGRATIONS.copymethat,
},
{
text: i18n.tc("migration.paprika.title"),
value: MIGRATIONS.paprika,
},
{
text: "Mealie",
text: i18n.tc("migration.mealie-pre-v1.title"),
value: MIGRATIONS.mealie,
},
];
const _content = {
[MIGRATIONS.nextcloud]: {
text: "Nextcloud recipes can be imported from a zip file that contains the data stored in Nextcloud. See the example folder structure below to ensure your recipes are able to be imported.",
text: i18n.tc("migration.nextcloud.description-long"),
tree: [
{
id: 1,
@ -146,7 +151,7 @@ export default defineComponent({
],
},
[MIGRATIONS.chowdown]: {
text: "Mealie natively supports the chowdown repository format. Download the code repository as a .zip file and upload it below",
text: i18n.tc("migration.chowdown.description-long"),
tree: [
{
id: 1,
@ -177,12 +182,35 @@ export default defineComponent({
},
],
},
[MIGRATIONS.copymethat]: {
text: i18n.tc("migration.copymethat.description-long"),
tree: [
{
id: 1,
icon: $globals.icons.zip,
name: "Copy_Me_That_20230306.zip",
children: [
{
id: 2,
name: "images",
icon: $globals.icons.folderOutline,
children: [
{ id: 3, name: "recipe_1_an5zy.jpg", icon: $globals.icons.fileImage },
{ id: 4, name: "recipe_2_82el8.jpg", icon: $globals.icons.fileImage },
{ id: 5, name: "recipe_3_j75qg.jpg", icon: $globals.icons.fileImage },
],
},
{ id: 6, name: "recipes.html", icon: $globals.icons.codeJson }
]
}
],
},
[MIGRATIONS.paprika]: {
text: "Mealie can import recipes from the Paprika application. Export your recipes from paprika, rename the export extension to .zip and upload it below.",
text: i18n.tc("migration.paprika.description-long"),
tree: false,
},
[MIGRATIONS.mealie]: {
text: "Mealie can import recipes from the Mealie application from a pre v1.0 release. Export your recipes from your old instance, and upload the zip file below. Note that only recipes can be imported from the export.",
text: i18n.tc("migration.mealie-pre-v1.description-long"),
tree: [
{
id: 1,

View File

@ -44,6 +44,6 @@ class GroupReportsController(BaseUserController):
def delete_one(self, item_id: UUID4):
try:
self.mixins.delete_one(item_id) # type: ignore
return SuccessResponse.respond(self.t("report-deleted"))
return SuccessResponse.respond(self.t("group.report-deleted"))
except Exception as ex:
raise HTTPException(500, ErrorResponse.respond("Failed to delete report")) from ex

View File

@ -12,6 +12,7 @@ from mealie.schema.reports.reports import ReportSummary
from mealie.services.migrations import (
BaseMigrator,
ChowdownMigrator,
CopyMeThatMigrator,
MealieAlphaMigrator,
NextcloudMigrator,
PaprikaMigrator,
@ -45,6 +46,7 @@ class GroupMigrationController(BaseUserController):
table: dict[SupportedMigrations, type[BaseMigrator]] = {
SupportedMigrations.chowdown: ChowdownMigrator,
SupportedMigrations.copymethat: CopyMeThatMigrator,
SupportedMigrations.mealie_alpha: MealieAlphaMigrator,
SupportedMigrations.nextcloud: NextcloudMigrator,
SupportedMigrations.paprika: PaprikaMigrator,

View File

@ -6,6 +6,7 @@ from mealie.schema._mealie import MealieModel
class SupportedMigrations(str, enum.Enum):
nextcloud = "nextcloud"
chowdown = "chowdown"
copymethat = "copymethat"
paprika = "paprika"
mealie_alpha = "mealie_alpha"

View File

@ -1,4 +1,5 @@
from .chowdown import *
from .copymethat import *
from .mealie_alpha import *
from .nextcloud import *
from .paprika import *

View File

@ -5,6 +5,7 @@ from uuid import UUID
from pydantic import UUID4
from mealie.core import root_logger
from mealie.core.exceptions import UnexpectedNone
from mealie.repos.all_repositories import AllRepositories
from mealie.schema.recipe import Recipe
from mealie.schema.recipe.recipe_settings import RecipeSettings
@ -16,6 +17,7 @@ from mealie.schema.reports.reports import (
ReportSummary,
ReportSummaryStatus,
)
from mealie.services.recipe.recipe_service import RecipeService
from mealie.services.scraper import cleaner
from .._base_service import BaseService
@ -38,17 +40,27 @@ class BaseMigrator(BaseService):
self.archive = archive
self.db = db
self.session = session
self.user_id = user_id
self.group_id = group_id
self.add_migration_tag = add_migration_tag
user = db.users.get_one(user_id)
if not user:
raise UnexpectedNone(f"Cannot find user {user_id}")
group = db.groups.get_one(group_id)
if not group:
raise UnexpectedNone(f"Cannot find group {group_id}")
self.user = user
self.group = group
self.name = "migration"
self.report_entries = []
self.logger = root_logger.get_logger()
self.helpers = DatabaseMigrationHelpers(self.db, self.session, self.group_id, self.user_id)
self.helpers = DatabaseMigrationHelpers(self.db, self.session, self.group.id, self.user.id)
self.recipe_service = RecipeService(db, user, group)
super().__init__()
@ -60,7 +72,7 @@ class BaseMigrator(BaseService):
name=report_name,
category=ReportCategory.migration,
status=ReportSummaryStatus.in_progress,
group_id=self.group_id,
group_id=self.group.id,
)
self.report = self.db.group_reports.create(report_to_save)
@ -117,25 +129,23 @@ class BaseMigrator(BaseService):
return_vars: list[tuple[str, UUID4, bool]] = []
group = self.db.groups.get_one(self.group_id)
if not group or not group.preferences:
if not self.group.preferences:
raise ValueError("Group preferences not found")
default_settings = RecipeSettings(
public=group.preferences.recipe_public,
show_nutrition=group.preferences.recipe_show_nutrition,
show_assets=group.preferences.recipe_show_assets,
landscape_view=group.preferences.recipe_landscape_view,
disable_comments=group.preferences.recipe_disable_comments,
disable_amount=group.preferences.recipe_disable_amount,
public=self.group.preferences.recipe_public,
show_nutrition=self.group.preferences.recipe_show_nutrition,
show_assets=self.group.preferences.recipe_show_assets,
landscape_view=self.group.preferences.recipe_landscape_view,
disable_comments=self.group.preferences.recipe_disable_comments,
disable_amount=self.group.preferences.recipe_disable_amount,
)
for recipe in validated_recipes:
recipe.settings = default_settings
recipe.user_id = self.user_id
recipe.group_id = self.group_id
recipe.user_id = self.user.id
recipe.group_id = self.group.id
if recipe.tags:
recipe.tags = self.helpers.get_or_set_tags(x.name for x in recipe.tags)
@ -151,7 +161,7 @@ class BaseMigrator(BaseService):
exception: str | Exception = ""
status = False
try:
recipe = self.db.recipes.create(recipe)
recipe = self.recipe_service.create_one(recipe)
status = True
except Exception as inst:

View File

@ -0,0 +1,123 @@
import tempfile
import zipfile
from datetime import datetime
from pathlib import Path
from bs4 import BeautifulSoup
from mealie.schema.reports.reports import ReportEntryCreate
from ._migration_base import BaseMigrator
from .utils.migration_alias import MigrationAlias
from .utils.migration_helpers import import_image
def parse_recipe_tags(tags: list) -> list[str]:
"""Parses the list of recipe tags and removes invalid ones"""
updated_tags: list[str] = []
for tag in tags:
if not tag or not isinstance(tag, str):
continue
if "Tags:" in tag:
continue
updated_tags.append(tag)
return updated_tags
class CopyMeThatMigrator(BaseMigrator):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.name = "copymethat"
self.key_aliases = [
MigrationAlias(key="last_made", alias="made_this", func=lambda x: datetime.now()),
MigrationAlias(key="notes", alias="recipeNotes"),
MigrationAlias(key="orgURL", alias="original_link"),
MigrationAlias(key="rating", alias="ratingValue"),
MigrationAlias(key="recipeIngredient", alias="recipeIngredients"),
MigrationAlias(key="recipeYield", alias="servings", func=lambda x: x.replace(":", ": ")),
]
def _process_recipe_document(self, source_dir: Path, soup: BeautifulSoup) -> dict:
"""Reads a single recipe's HTML and converts it to a dictionary"""
recipe_dict: dict = {}
recipe_tags: list[str] = []
for tag in soup.find_all():
# the recipe image tag has no id, so we parse it directly
if tag.name == "img" and "recipeImage" in tag.get("class", []):
if image_path := tag.get("src"):
recipe_dict["image"] = str(source_dir.joinpath(image_path))
continue
# tags (internally named categories) are not in a list, and don't have ids
if tag.name == "span" and "recipeCategory" in tag.get("class", []):
recipe_tag = tag.get_text(strip=True)
if "Tags:" not in recipe_tag:
recipe_tags.append(recipe_tag)
continue
# add only elements with an id to the recipe dictionary
if not (tag_id := tag.get("id")):
continue
# for lists, store the list items as an array (e.g. for recipe instructions)
if tag.name in ["ul", "ol"]:
recipe_dict[tag_id] = [item.get_text(strip=True) for item in tag.find_all("li", recursive=False)]
continue
# for all other tags, write the text directly to the recipe data
recipe_dict[tag_id] = tag.get_text(strip=True)
if recipe_tags:
recipe_dict["tags"] = recipe_tags
return recipe_dict
def _migrate(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
with zipfile.ZipFile(self.archive) as zip_file:
zip_file.extractall(tmpdir)
source_dir = Path(tmpdir)
recipes_as_dicts: list[dict] = []
for recipes_data_file in source_dir.glob("*.html"):
with open(recipes_data_file, encoding="utf-8") as f:
soup = BeautifulSoup(f, "lxml")
for recipe_data in soup.find_all("div", class_="recipe"):
try:
recipes_as_dicts.append(self._process_recipe_document(source_dir, recipe_data))
# since recipes are stored in one large file, we keep going on error
except Exception as e:
self.report_entries.append(
ReportEntryCreate(
report_id=self.report_id,
success=False,
message="Failed to parse recipe",
exception=f"{type(e).__name__}: {e}",
)
)
recipes = [self.clean_recipe_dictionary(x) for x in recipes_as_dicts]
results = self.import_recipes_to_database(recipes)
recipe_lookup = {r.slug: r for r in recipes}
for slug, recipe_id, status in results:
if status:
try:
r = recipe_lookup.get(slug)
if not r or not r.image:
continue
except StopIteration:
continue
import_image(r.image, recipe_id)

View File

@ -81,10 +81,17 @@ def glob_walker(directory: Path, glob_str: str, return_parent=True) -> list[Path
return matches
def import_image(src: Path, recipe_id: UUID4):
def import_image(src: str | Path, recipe_id: UUID4):
"""Read the successful migrations attribute and for each import the image
appropriately into the image directory. Minification is done in mass
after the migration occurs.
"""
if isinstance(src, str):
src = Path(src)
if not src.exists():
return
data_service = RecipeDataService(recipe_id=recipe_id)
data_service.write_image(src, src.suffix)

View File

@ -49,7 +49,9 @@ def clean(recipe_data: dict, url=None) -> dict:
recipe_data["recipeInstructions"] = clean_instructions(recipe_data.get("recipeInstructions", []))
recipe_data["image"] = clean_image(recipe_data.get("image"))[0]
recipe_data["slug"] = slugify(recipe_data.get("name", ""))
recipe_data["orgURL"] = url
recipe_data["orgURL"] = url or recipe_data.get("orgURL")
recipe_data["notes"] = clean_notes(recipe_data.get("notes"))
recipe_data["rating"] = clean_int(recipe_data.get("rating"))
return recipe_data
@ -255,6 +257,48 @@ def clean_ingredients(ingredients: list | str | None, default: list | None = Non
raise TypeError(f"Unexpected type for ingredients: {type(ingredients)}, {ingredients}")
def clean_int(val: str | int | None, min: int | None = None, max: int | None = None):
if val is None or isinstance(val, int):
return val
filtered_val = "".join(c for c in val if c.isnumeric())
if not filtered_val:
return None
val = int(filtered_val)
if min is None or max is None:
return val
if not (min <= val <= max):
return None
return val
def clean_notes(notes: typing.Any) -> list[dict] | None:
if not isinstance(notes, list):
return None
parsed_notes: list[dict] = []
for note in notes:
if not isinstance(note, (str, dict)):
continue
if isinstance(note, dict):
if "text" not in note:
continue
if "title" not in note:
note["title"] = ""
parsed_notes.append(note)
continue
parsed_notes.append({"title": "", "text": note})
return parsed_notes
def clean_yield(yld: str | list[str] | None) -> str:
"""
yield_amount attemps to parse out the yield amount from a recipe.

2
poetry.lock generated
View File

@ -3050,4 +3050,4 @@ pgsql = ["psycopg2-binary"]
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "c88c209ebd76e8f697a1d1af54496b900d4e16a087ca97be7cb288da039526ce"
content-hash = "2e8a98d35c22f3afceefbf22d2b2e23d3471eb5af88e3a907ec848370e92dd14"

View File

@ -41,6 +41,7 @@ recipe-scrapers = "^14.26.0"
requests = "^2.25.1"
tzdata = "^2022.7"
uvicorn = {extras = ["standard"], version = "^0.20.0"}
beautifulsoup4 = "^4.11.2"
[tool.poetry.group.dev.dependencies]
black = "^23.1.0"

View File

@ -8,6 +8,8 @@ migrations_paprika = CWD / "migrations/paprika.zip"
migrations_chowdown = CWD / "migrations/chowdown.zip"
migrations_copymethat = CWD / "migrations/copymethat.zip"
migrations_mealie = CWD / "migrations/mealie.zip"
migrations_nextcloud = CWD / "migrations/nextcloud.zip"

Binary file not shown.

View File

@ -7,6 +7,7 @@ from fastapi.testclient import TestClient
from mealie.schema.group.group_migration import SupportedMigrations
from tests import data as test_data
from tests.utils import api_routes
from tests.utils.assertion_helpers import assert_derserialize
from tests.utils.fixture_schemas import TestUser
@ -20,6 +21,7 @@ test_cases = [
MigrationTestData(typ=SupportedMigrations.nextcloud, archive=test_data.migrations_nextcloud),
MigrationTestData(typ=SupportedMigrations.paprika, archive=test_data.migrations_paprika),
MigrationTestData(typ=SupportedMigrations.chowdown, archive=test_data.migrations_chowdown),
MigrationTestData(typ=SupportedMigrations.copymethat, archive=test_data.migrations_copymethat),
MigrationTestData(typ=SupportedMigrations.mealie_alpha, archive=test_data.migrations_mealie),
]
@ -27,6 +29,7 @@ test_ids = [
"nextcloud_archive",
"paprika_archive",
"chowdown_archive",
"copymethat_archive",
"mealie_alpha_archive",
]
@ -56,3 +59,15 @@ def test_recipe_migration(api_client: TestClient, unique_user: TestUser, mig: Mi
for item in response.json()["entries"]:
assert item["success"]
# Validate Create Event
params = {"orderBy": "created_at", "orderDirection": "desc"}
response = api_client.get(api_routes.recipes, params=params, headers=unique_user.token)
query_data = assert_derserialize(response)
assert len(query_data["items"])
slug = query_data["items"][0]["slug"]
response = api_client.get(api_routes.recipes_slug_timeline_events(slug), headers=unique_user.token)
query_data = assert_derserialize(response)
events = query_data["items"]
assert len(events)