fix: Better Scraper Image Processing (#2821)

* add additional case for scraped image parsing

* made scraper more fault tolerant for missing images

* re-ordered case to favor better implementations

---------

Co-authored-by: Kuchenpirat <24235032+Kuchenpirat@users.noreply.github.com>
This commit is contained in:
Michael Genson 2023-12-11 03:22:06 -06:00 committed by GitHub
parent a6ec488864
commit 5a153b178d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -89,10 +89,10 @@ def clean_image(image: str | list | dict | None = None, default: str = "no image
image attempts to parse the image field from a recipe and return a string. Currenty
Supported Structures:
- `https://exmaple.com` - A string
- `{ "url": "https://exmaple.com" }` - A dictionary with a `url` key
- `["https://exmaple.com"]` - A list of strings
- `[{ "url": "https://exmaple.com" }]` - A list of dictionaries with a `url` key
- `https://example.com` - A string
- `{ "url": "https://example.com" }` - A dictionary with a `url` key
- `["https://example.com"]` - A list of strings
- `[{ "url": "https://example.com" }]` - A list of dictionaries with a `url` key
Raises:
TypeError: If the image field is not a supported type a TypeError is raised.
@ -112,8 +112,11 @@ def clean_image(image: str | list | dict | None = None, default: str = "no image
return [x["url"] for x in image]
case {"url": str(image)}:
return [image]
case [{"@id": str(_)}, *_]:
return [x["@id"] for x in image]
case _:
raise TypeError(f"Unexpected type for image: {type(image)}, {image}")
logger.exception(f"Unexpected type for image: {type(image)}, {image}")
return [default]
def clean_instructions(steps_object: list | dict | str, default: list | None = None) -> list[dict]: