diff --git a/mealie/services/scraper/cleaner.py b/mealie/services/scraper/cleaner.py index 63c94aa322ec..920927c6614c 100644 --- a/mealie/services/scraper/cleaner.py +++ b/mealie/services/scraper/cleaner.py @@ -47,7 +47,7 @@ def clean(recipe_data: dict, url=None) -> dict: recipe_data["recipeYield"] = clean_yield(recipe_data.get("recipeYield")) recipe_data["recipeIngredient"] = clean_ingredients(recipe_data.get("recipeIngredient", [])) recipe_data["recipeInstructions"] = clean_instructions(recipe_data.get("recipeInstructions", [])) - recipe_data["image"] = clean_image(recipe_data.get("image")) + recipe_data["image"] = clean_image(recipe_data.get("image"))[0] recipe_data["slug"] = slugify(recipe_data.get("name", "")) recipe_data["orgURL"] = url @@ -77,31 +77,34 @@ def clean_string(text: str | list | int) -> str: return cleaned_text -def clean_image(image: str | list | dict | None = None, default="no image") -> str: +def clean_image(image: str | list | dict | None = None, default: str = "no image") -> list[str]: """ image attempts to parse the image field from a recipe and return a string. Currenty Supported Structures: - - `["https://exmaple.com"]` - A list of strings - `https://exmaple.com` - A string - - `{ "url": "https://exmaple.com"` - A dictionary with a `url` key + - `{ "url": "https://exmaple.com" }` - A dictionary with a `url` key + - `["https://exmaple.com"]` - A list of strings + - `[{ "url": "https://exmaple.com" }]` - A list of dictionaries with a `url` key Raises: TypeError: If the image field is not a supported type a TypeError is raised. Returns: - str: "no image" if any empty string is provided or the url of the image + list[str]: list of urls, or [default] if input is empty """ if not image: - return default + return [default] - match image: # noqa - match statement not supported + match image: case str(image): + return [image] + case [str(_), *_]: return image - case list(image): - return image[0] + case [{"url": str(_)}, *_]: + return [x["url"] for x in image] case {"url": str(image)}: - return image + return [image] case _: raise TypeError(f"Unexpected type for image: {type(image)}, {image}") diff --git a/mealie/services/scraper/scraper_strategies.py b/mealie/services/scraper/scraper_strategies.py index de8ff6cd6757..cd2435192366 100644 --- a/mealie/services/scraper/scraper_strategies.py +++ b/mealie/services/scraper/scraper_strategies.py @@ -150,7 +150,7 @@ class RecipeScraperPackage(ABCScraperStrategy): recipe = Recipe( name=try_get_default(scraped_data.title, "name", "No Name Found", cleaner.clean_string), slug="", - image=try_get_default(None, "image", None), + image=try_get_default(None, "image", None, cleaner.clean_image), description=try_get_default(None, "description", "", cleaner.clean_string), nutrition=try_get_default(None, "nutrition", None, cleaner.clean_nutrition), recipe_yield=try_get_default(scraped_data.yields, "recipeYield", "1", cleaner.clean_string), diff --git a/tests/unit_tests/services_tests/scraper_tests/test_cleaner_parts.py b/tests/unit_tests/services_tests/scraper_tests/test_cleaner_parts.py index dec06cc9d339..67cbf9b34869 100644 --- a/tests/unit_tests/services_tests/scraper_tests/test_cleaner_parts.py +++ b/tests/unit_tests/services_tests/scraper_tests/test_cleaner_parts.py @@ -73,22 +73,27 @@ image_cleaner_test_cases = ( CleanerCase( test_id="empty_string", input="", - expected="no image", + expected=["no image"], ), CleanerCase( test_id="no_change", input="https://example.com/image.jpg", - expected="https://example.com/image.jpg", + expected=["https://example.com/image.jpg"], ), CleanerCase( test_id="dict with url key", input={"url": "https://example.com/image.jpg"}, - expected="https://example.com/image.jpg", + expected=["https://example.com/image.jpg"], ), CleanerCase( test_id="list of strings", input=["https://example.com/image.jpg"], - expected="https://example.com/image.jpg", + expected=["https://example.com/image.jpg"], + ), + CleanerCase( + test_id="list of dicts with url key", + input=[{"url": "https://example.com/image.jpg"}], + expected=["https://example.com/image.jpg"], ), )