From 38502e82d4d1eb07d6ac4f2b9fd5d7202954e11e Mon Sep 17 00:00:00 2001 From: jlssmt <42897917+jlssmt@users.noreply.github.com> Date: Fri, 27 Sep 2024 13:02:34 +0200 Subject: [PATCH] feat: add regex to url before scraping (#4174) Co-authored-by: Kuchenpirat <24235032+Kuchenpirat@users.noreply.github.com> --- mealie/services/scraper/scraper.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mealie/services/scraper/scraper.py b/mealie/services/scraper/scraper.py index a6e307ca8206..75a29d60ec5b 100644 --- a/mealie/services/scraper/scraper.py +++ b/mealie/services/scraper/scraper.py @@ -1,4 +1,5 @@ from enum import Enum +from re import search as regex_search from uuid import uuid4 from fastapi import HTTPException, status @@ -31,7 +32,13 @@ async def create_from_url(url: str, translator: Translator) -> tuple[Recipe, Scr Recipe: Recipe Object """ scraper = RecipeScraper(translator) - new_recipe, extras = await scraper.scrape(url) + + extracted_url = regex_search(r"(https?://|www\.)[^\s]+", url) + + if not extracted_url: + raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value}) + + new_recipe, extras = await scraper.scrape(extracted_url.group(0)) if not new_recipe: raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})