fix: Make recipe scraper cleaner more fault tolerant (#3967)

Co-authored-by: Kuchenpirat <24235032+Kuchenpirat@users.noreply.github.com>
This commit is contained in:
Michael Genson 2024-08-01 01:33:46 -05:00 committed by GitHub
parent 05c034fca2
commit 3677d04b56
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -110,11 +110,11 @@ def clean_image(image: str | list | dict | None = None, default: str = "no image
case [str(_), *_]: case [str(_), *_]:
return [x for x in image if x] # Only return non-null strings in list return [x for x in image if x] # Only return non-null strings in list
case [{"url": str(_)}, *_]: case [{"url": str(_)}, *_]:
return [x["url"] for x in image] return [x["url"] for x in image if "url" in x]
case {"url": str(image)}: case {"url": str(image)}:
return [image] return [image]
case [{"@id": str(_)}, *_]: case [{"@id": str(_)}, *_]:
return [x["@id"] for x in image] return [x["@id"] for x in image if "@id" in x]
case _: case _:
logger.exception(f"Unexpected type for image: {type(image)}, {image}") logger.exception(f"Unexpected type for image: {type(image)}, {image}")
return [default] return [default]
@ -149,7 +149,7 @@ def clean_instructions(steps_object: list | dict | str, default: list | None = N
return [ return [
{"text": _sanitize_instruction_text(instruction["text"])} {"text": _sanitize_instruction_text(instruction["text"])}
for instruction in steps_object for instruction in steps_object
if instruction["text"].strip() if "text" in instruction and instruction["text"].strip()
] ]
case {0: {"text": str()}} | {"0": {"text": str()}} | {1: {"text": str()}} | {"1": {"text": str()}}: case {0: {"text": str()}} | {"0": {"text": str()}} | {1: {"text": str()}} | {"1": {"text": str()}}:
# Some recipes have a dict with a string key representing the index, unsure if these can # Some recipes have a dict with a string key representing the index, unsure if these can