feat: add regex to url before scraping (#4174)

Co-authored-by: Kuchenpirat <24235032+Kuchenpirat@users.noreply.github.com>
This commit is contained in:
jlssmt 2024-09-27 13:02:34 +02:00 committed by GitHub
parent f8cd8b00a5
commit 38502e82d4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,4 +1,5 @@
from enum import Enum
from re import search as regex_search
from uuid import uuid4
from fastapi import HTTPException, status
@ -31,7 +32,13 @@ async def create_from_url(url: str, translator: Translator) -> tuple[Recipe, Scr
Recipe: Recipe Object
"""
scraper = RecipeScraper(translator)
new_recipe, extras = await scraper.scrape(url)
extracted_url = regex_search(r"(https?://|www\.)[^\s]+", url)
if not extracted_url:
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})
new_recipe, extras = await scraper.scrape(extracted_url.group(0))
if not new_recipe:
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})