feat: Create Recipe From HTML or JSON (#4274)

Co-authored-by: Kuchenpirat <24235032+Kuchenpirat@users.noreply.github.com>
This commit is contained in:
Michael Genson 2024-09-30 10:52:13 -05:00 committed by GitHub
parent edf420491f
commit 4c1d855690
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
23 changed files with 408 additions and 115 deletions

View File

@ -15,6 +15,10 @@ We have renamed the `updateAt` field to `updatedAt`. While the API will still ac
### Backend Endpoint Changes
These endpoints have moved, but are otherwise unchanged:
- `/recipes/create-url` -> `/recipes/create/url`
- `/recipes/create-url/bulk` -> `/recipes/create/url/bulk`
- `/recipes/create-from-zip` -> `/recipes/create/zip`
- `/recipes/create-from-image` -> `/recipes/create/image`
- `/groups/webhooks` -> `/households/webhooks`
- `/groups/shopping/items` -> `/households/shopping/items`
- `/groups/shopping/lists` -> `/households/shopping/lists`

View File

@ -23,7 +23,7 @@ function import_from_file () {
do
echo $line
curl -X 'POST' \
"$3/api/recipes/create-url" \
"$3/api/recipes/create/url" \
-H "Authorization: Bearer $2" \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
@ -81,7 +81,7 @@ def import_from_file(input_file, token, mealie_url):
data = {
'url': line
}
response = requests.post(mealie_url + "/api/recipes/create-url", headers=headers, json=data)
response = requests.post(mealie_url + "/api/recipes/create/url", headers=headers, json=data)
print(response.text)
input_file="list"

View File

@ -18,7 +18,7 @@ Use your best judgement when deciding what to do.
By default, the API is **not** rate limited. This leaves Mealie open to a potential **Denial of Service Attack**. While it's possible to perform a **Denial of Service Attack** on any endpoint, there are a few key endpoints that are more vulnerable than others.
- `/api/recipes/create-url`
- `/api/recipes/create/url`
- `/api/recipes/{id}/image`
These endpoints are used to scrape data based off a user provided URL. It is possible for a malicious user to issue multiple requests to download an arbitrarily large external file (e.g a Debian ISO) and sufficiently saturate a CPU assigned to the container. While we do implement some protections against this by chunking the response, and using a timeout strategy, it's still possible to overload the CPU if an attacker issues multiple requests concurrently.
@ -33,7 +33,7 @@ If you'd like to mitigate this risk, we suggest that you rate limit the API in g
## Server Side Request Forgery
- `/api/recipes/create-url`
- `/api/recipes/create/url`
- `/api/recipes/{id}/image`
Given the nature of these APIs it's possible to perform a **Server Side Request Forgery** attack. This is where a malicious user can issue a request to an internal network resource, and potentially exfiltrate data. We _do_ perform some checks to mitigate access to resources within your network but at the end of the day, users of Mealie are allowed to trigger HTTP requests on **your server**.

View File

@ -1,7 +1,7 @@
<template>
<VJsoneditor
:value="value"
height="1500px"
:height="height"
:options="options"
:attrs="$attrs"
@input="$emit('input', $event)"
@ -20,6 +20,10 @@ export default defineComponent({
type: Object,
default: () => ({}),
},
height: {
type: String,
default: "1500px",
},
options: {
type: Object,
default: () => ({}),

View File

@ -426,6 +426,7 @@
"paste-in-your-recipe-data-each-line-will-be-treated-as-an-item-in-a-list": "Paste in your recipe data. Each line will be treated as an item in a list",
"recipe-markup-specification": "Recipe Markup Specification",
"recipe-url": "Recipe URL",
"recipe-html-or-json": "Recipe HTML or JSON",
"upload-a-recipe": "Upload a Recipe",
"upload-individual-zip-file": "Upload an individual .zip file exported from another Mealie instance.",
"url-form-hint": "Copy and paste a link from your favorite recipe website",
@ -604,10 +605,16 @@
"scrape-recipe-description": "Scrape a recipe by url. Provide the url for the site you want to scrape, and Mealie will attempt to scrape the recipe from that site and add it to your collection.",
"scrape-recipe-have-a-lot-of-recipes": "Have a lot of recipes you want to scrape at once?",
"scrape-recipe-suggest-bulk-importer": "Try out the bulk importer",
"scrape-recipe-have-raw-html-or-json-data": "Have raw HTML or JSON data?",
"scrape-recipe-you-can-import-from-raw-data-directly": "You can import from raw data directly",
"import-original-keywords-as-tags": "Import original keywords as tags",
"stay-in-edit-mode": "Stay in Edit mode",
"import-from-zip": "Import from Zip",
"import-from-zip-description": "Import a single recipe that was exported from another Mealie instance.",
"import-from-html-or-json": "Import from HTML or JSON",
"import-from-html-or-json-description": "Import a single recipe from raw HTML or JSON. This is useful if you have a recipe from a site that Mealie can't scrape normally, or from some other external source.",
"json-import-format-description-colon": "To import via JSON, it must be in valid format:",
"json-editor": "JSON Editor",
"zip-files-must-have-been-exported-from-mealie": ".zip files must have been exported from Mealie",
"create-a-recipe-by-uploading-a-scan": "Create a recipe by uploading a scan.",
"upload-a-png-image-from-a-recipe-book": "Upload a png image from a recipe book",

View File

@ -472,8 +472,15 @@ export interface SaveIngredientUnit {
groupId: string;
}
export interface ScrapeRecipe {
url: string;
includeTags?: boolean;
url: string;
}
export interface ScrapeRecipeBase {
includeTags?: boolean;
}
export interface ScrapeRecipeData {
includeTags?: boolean;
data: string;
}
export interface ScrapeRecipeTest {
url: string;

View File

@ -32,10 +32,11 @@ const routes = {
recipesCreate: `${prefix}/recipes/create`,
recipesBase: `${prefix}/recipes`,
recipesTestScrapeUrl: `${prefix}/recipes/test-scrape-url`,
recipesCreateUrl: `${prefix}/recipes/create-url`,
recipesCreateUrlBulk: `${prefix}/recipes/create-url/bulk`,
recipesCreateFromZip: `${prefix}/recipes/create-from-zip`,
recipesCreateFromImage: `${prefix}/recipes/create-from-image`,
recipesCreateUrl: `${prefix}/recipes/create/url`,
recipesCreateUrlBulk: `${prefix}/recipes/create/url/bulk`,
recipesCreateFromZip: `${prefix}/recipes/create/zip`,
recipesCreateFromImage: `${prefix}/recipes/create/image`,
recipesCreateFromHtmlOrJson: `${prefix}/recipes/create/html-or-json`,
recipesCategory: `${prefix}/recipes/category`,
recipesParseIngredient: `${prefix}/parser/ingredient`,
recipesParseIngredients: `${prefix}/parser/ingredients`,
@ -134,6 +135,10 @@ export class RecipeAPI extends BaseCRUDAPI<CreateRecipe, Recipe, Recipe> {
return await this.requests.post<Recipe | null>(routes.recipesTestScrapeUrl, { url, useOpenAI });
}
async createOneByHtmlOrJson(data: string, includeTags: boolean) {
return await this.requests.post<string>(routes.recipesCreateFromHtmlOrJson, { data, includeTags });
}
async createOneByUrl(url: string, includeTags: boolean) {
return await this.requests.post<string>(routes.recipesCreateUrl, { url, includeTags });
}

View File

@ -150,7 +150,8 @@ import {
mdiRotateRight,
mdiBookOpenPageVariant,
mdiFileCabinet,
mdiSilverwareForkKnife
mdiSilverwareForkKnife,
mdiCodeTags,
} from "@mdi/js";
export const icons = {
@ -192,6 +193,7 @@ export const icons = {
clockOutline: mdiClockTimeFourOutline,
codeBraces: mdiCodeJson,
codeJson: mdiCodeJson,
codeTags: mdiCodeTags,
cog: mdiCog,
commentTextMultiple: mdiCommentTextMultiple,
commentTextMultipleOutline: mdiCommentTextMultipleOutline,

View File

@ -52,6 +52,11 @@ export default defineComponent({
text: i18n.tc("recipe.bulk-url-import"),
value: "bulk",
},
{
icon: $globals.icons.codeTags,
text: i18n.tc("recipe.import-from-html-or-json"),
value: "html",
},
{
icon: $globals.icons.fileImage,
text: i18n.tc("recipe.create-from-image"),

View File

@ -0,0 +1,171 @@
<template>
<v-form ref="domUrlForm" @submit.prevent="createFromHtmlOrJson(newRecipeData, importKeywordsAsTags, stayInEditMode)">
<div>
<v-card-title class="headline"> {{ $tc('recipe.import-from-html-or-json') }} </v-card-title>
<v-card-text>
<p>
{{ $tc("recipe.import-from-html-or-json-description") }}
</p>
<p>
{{ $tc("recipe.json-import-format-description-colon") }}
<a href="https://schema.org/Recipe" target="_blank">https://schema.org/Recipe</a>
</p>
<v-switch
v-model="isEditJSON"
:label="$tc('recipe.json-editor')"
class="mt-2"
@change="handleIsEditJson"
/>
<LazyRecipeJsonEditor
v-if="isEditJSON"
v-model="newRecipeData"
height="250px"
class="mt-10"
:options="EDITOR_OPTIONS"
/>
<v-textarea
v-else
v-model="newRecipeData"
:label="$tc('new-recipe.recipe-html-or-json')"
:prepend-inner-icon="$globals.icons.codeTags"
validate-on-blur
autofocus
filled
clearable
class="rounded-lg mt-2"
rounded
:hint="$tc('new-recipe.url-form-hint')"
persistent-hint
/>
<v-checkbox v-model="importKeywordsAsTags" hide-details :label="$tc('recipe.import-original-keywords-as-tags')" />
<v-checkbox v-model="stayInEditMode" hide-details :label="$tc('recipe.stay-in-edit-mode')" />
</v-card-text>
<v-card-actions class="justify-center">
<div style="width: 250px">
<BaseButton
:disabled="!newRecipeData"
large
rounded
block
type="submit"
:loading="loading"
/>
</div>
</v-card-actions>
</div>
</v-form>
</template>
<script lang="ts">
import { computed, defineComponent, reactive, toRefs, ref, useContext, useRoute, useRouter } from "@nuxtjs/composition-api";
import { AxiosResponse } from "axios";
import { useTagStore } from "~/composables/store/use-tag-store";
import { useUserApi } from "~/composables/api";
import { validators } from "~/composables/use-validators";
import { VForm } from "~/types/vuetify";
const EDITOR_OPTIONS = {
mode: "code",
search: false,
mainMenuBar: false,
};
export default defineComponent({
setup() {
const state = reactive({
error: false,
loading: false,
isEditJSON: false,
});
const { $auth } = useContext();
const route = useRoute();
const groupSlug = computed(() => route.value.params.groupSlug || $auth.user?.groupSlug || "");
const domUrlForm = ref<VForm | null>(null);
const api = useUserApi();
const router = useRouter();
const tags = useTagStore();
const importKeywordsAsTags = computed({
get() {
return route.value.query.use_keywords === "1";
},
set(v: boolean) {
router.replace({ query: { ...route.value.query, use_keywords: v ? "1" : "0" } });
},
});
const stayInEditMode = computed({
get() {
return route.value.query.edit === "1";
},
set(v: boolean) {
router.replace({ query: { ...route.value.query, edit: v ? "1" : "0" } });
},
});
function handleResponse(response: AxiosResponse<string> | null, edit = false, refreshTags = false) {
if (response?.status !== 201) {
state.error = true;
state.loading = false;
return;
}
if (refreshTags) {
tags.actions.refresh();
}
router.push(`/g/${groupSlug.value}/r/${response.data}?edit=${edit.toString()}`);
}
const newRecipeData = ref<string | object | null>(null);
function handleIsEditJson() {
if (state.isEditJSON) {
if (newRecipeData.value) {
try {
newRecipeData.value = JSON.parse(newRecipeData.value as string);
} catch {
newRecipeData.value = { "data": newRecipeData.value };
}
} else {
newRecipeData.value = {};
}
} else if (newRecipeData.value && Object.keys(newRecipeData.value).length > 0) {
newRecipeData.value = JSON.stringify(newRecipeData.value);
} else {
newRecipeData.value = null;
}
}
handleIsEditJson();
async function createFromHtmlOrJson(htmlOrJsonData: string | object | null, importKeywordsAsTags: boolean, stayInEditMode: boolean) {
if (!htmlOrJsonData || !domUrlForm.value?.validate()) {
return;
}
let dataString;
if (typeof htmlOrJsonData === "string") {
dataString = htmlOrJsonData;
} else {
dataString = JSON.stringify(htmlOrJsonData);
}
state.loading = true;
const { response } = await api.recipes.createOneByHtmlOrJson(dataString, importKeywordsAsTags);
handleResponse(response, stayInEditMode, importKeywordsAsTags);
}
return {
EDITOR_OPTIONS,
domUrlForm,
importKeywordsAsTags,
stayInEditMode,
newRecipeData,
handleIsEditJson,
createFromHtmlOrJson,
...toRefs(state),
validators,
};
},
});
</script>

View File

@ -5,7 +5,13 @@
<v-card-title class="headline"> {{ $t('recipe.scrape-recipe') }} </v-card-title>
<v-card-text>
<p>{{ $t('recipe.scrape-recipe-description') }}</p>
<p>{{ $t('recipe.scrape-recipe-have-a-lot-of-recipes') }} <a :href="bulkImporterTarget">{{ $t('recipe.scrape-recipe-suggest-bulk-importer') }}</a>.</p>
<p>
{{ $t('recipe.scrape-recipe-have-a-lot-of-recipes') }}
<a :href="bulkImporterTarget">{{ $t('recipe.scrape-recipe-suggest-bulk-importer') }}</a>.
<br />
{{ $t('recipe.scrape-recipe-have-raw-html-or-json-data') }}
<a :href="htmlOrJsonImporterTarget">{{ $t('recipe.scrape-recipe-you-can-import-from-raw-data-directly') }}</a>.
</p>
<v-text-field
v-model="recipeUrl"
:label="$t('new-recipe.recipe-url')"
@ -96,6 +102,7 @@ export default defineComponent({
const tags = useTagStore();
const bulkImporterTarget = computed(() => `/g/${groupSlug.value}/r/create/bulk`);
const htmlOrJsonImporterTarget = computed(() => `/g/${groupSlug.value}/r/create/html`);
function handleResponse(response: AxiosResponse<string> | null, edit = false, refreshTags = false) {
if (response?.status !== 201) {
@ -171,6 +178,7 @@ export default defineComponent({
return {
bulkImporterTarget,
htmlOrJsonImporterTarget,
recipeUrl,
importKeywordsAsTags,
stayInEditMode,

View File

@ -67,7 +67,7 @@ export default defineComponent({
const formData = new FormData();
formData.append(newRecipeZipFileName, newRecipeZip.value);
const { response } = await api.upload.file("/api/recipes/create-from-zip", formData);
const { response } = await api.upload.file("/api/recipes/create/zip", formData);
handleResponse(response);
}

View File

@ -40,7 +40,7 @@ from mealie.routes._base.mixins import HttpRepo
from mealie.routes._base.routers import MealieCrudRoute, UserAPIRouter
from mealie.schema.cookbook.cookbook import ReadCookBook
from mealie.schema.make_dependable import make_dependable
from mealie.schema.recipe import Recipe, RecipeImageTypes, ScrapeRecipe
from mealie.schema.recipe import Recipe, RecipeImageTypes, ScrapeRecipe, ScrapeRecipeData
from mealie.schema.recipe.recipe import (
CreateRecipe,
CreateRecipeByUrlBulk,
@ -73,7 +73,7 @@ from mealie.services.recipe.recipe_service import RecipeService
from mealie.services.recipe.template_service import TemplateService
from mealie.services.scraper.recipe_bulk_scraper import RecipeBulkScraperService
from mealie.services.scraper.scraped_extras import ScraperContext
from mealie.services.scraper.scraper import create_from_url
from mealie.services.scraper.scraper import create_from_html
from mealie.services.scraper.scraper_strategies import (
ForceTimeoutException,
RecipeScraperOpenAI,
@ -201,11 +201,31 @@ class RecipeController(BaseRecipeController):
# =======================================================================
# URL Scraping Operations
@router.post("/create-url", status_code=201, response_model=str)
@router.post("/create/html-or-json", status_code=201)
async def create_recipe_from_html_or_json(self, req: ScrapeRecipeData):
"""Takes in raw HTML or a https://schema.org/Recipe object as a JSON string and parses it like a URL"""
if req.data.startswith("{"):
req.data = RecipeScraperPackage.ld_json_to_html(req.data)
return await self._create_recipe_from_web(req)
@router.post("/create/url", status_code=201, response_model=str)
async def parse_recipe_url(self, req: ScrapeRecipe):
"""Takes in a URL and attempts to scrape data and load it into the database"""
return await self._create_recipe_from_web(req)
async def _create_recipe_from_web(self, req: ScrapeRecipe | ScrapeRecipeData):
if isinstance(req, ScrapeRecipeData):
html = req.data
url = ""
else:
html = None
url = req.url
try:
recipe, extras = await create_from_url(req.url, self.translator)
recipe, extras = await create_from_html(url, self.translator, html)
except ForceTimeoutException as e:
raise HTTPException(
status_code=408, detail=ErrorResponse.respond(message="Recipe Scraping Timed Out")
@ -233,7 +253,7 @@ class RecipeController(BaseRecipeController):
return new_recipe.slug
@router.post("/create-url/bulk", status_code=202)
@router.post("/create/url/bulk", status_code=202)
def parse_recipe_url_bulk(self, bulk: CreateRecipeByUrlBulk, bg_tasks: BackgroundTasks):
"""Takes in a URL and attempts to scrape data and load it into the database"""
bulk_scraper = RecipeBulkScraperService(self.service, self.repos, self.group, self.translator)
@ -266,7 +286,7 @@ class RecipeController(BaseRecipeController):
# ==================================================================================================================
# Other Create Operations
@router.post("/create-from-zip", status_code=201)
@router.post("/create/zip", status_code=201)
def create_recipe_from_zip(self, archive: UploadFile = File(...)):
"""Create recipe from archive"""
with get_temporary_zip_path() as temp_path:
@ -280,7 +300,7 @@ class RecipeController(BaseRecipeController):
return recipe.slug
@router.post("/create-from-image", status_code=201)
@router.post("/create/image", status_code=201)
async def create_recipe_from_image(
self,
images: list[UploadFile] = File(...),

View File

@ -71,7 +71,7 @@ from .recipe_ingredient import (
)
from .recipe_notes import RecipeNote
from .recipe_nutrition import Nutrition
from .recipe_scraper import ScrapeRecipe, ScrapeRecipeTest
from .recipe_scraper import ScrapeRecipe, ScrapeRecipeBase, ScrapeRecipeData, ScrapeRecipeTest
from .recipe_settings import RecipeSettings
from .recipe_share_token import RecipeShareToken, RecipeShareTokenCreate, RecipeShareTokenSave, RecipeShareTokenSummary
from .recipe_step import IngredientReferences, RecipeStep
@ -157,6 +157,8 @@ __all__ = [
"RecipeTool",
"RecipeToolPagination",
"ScrapeRecipe",
"ScrapeRecipeBase",
"ScrapeRecipeData",
"ScrapeRecipeTest",
"AssignCategories",
"AssignSettings",

View File

@ -8,9 +8,12 @@ class ScrapeRecipeTest(MealieModel):
use_openai: bool = Field(False, alias="useOpenAI")
class ScrapeRecipe(MealieModel):
url: str
class ScrapeRecipeBase(MealieModel):
include_tags: bool = False
class ScrapeRecipe(ScrapeRecipeBase):
url: str
model_config = ConfigDict(
json_schema_extra={
"example": {
@ -19,3 +22,8 @@ class ScrapeRecipe(MealieModel):
},
}
)
class ScrapeRecipeData(ScrapeRecipeBase):
data: str
"""HTML data or JSON string of a https://schema.org/Recipe object"""

View File

@ -15,7 +15,7 @@ from mealie.schema.reports.reports import (
from mealie.schema.user.user import GroupInDB
from mealie.services._base_service import BaseService
from mealie.services.recipe.recipe_service import RecipeService
from mealie.services.scraper.scraper import create_from_url
from mealie.services.scraper.scraper import create_from_html
class RecipeBulkScraperService(BaseService):
@ -85,7 +85,7 @@ class RecipeBulkScraperService(BaseService):
async def _do(url: str) -> Recipe | None:
async with sem:
try:
recipe, _ = await create_from_url(url, self.translator)
recipe, _ = await create_from_html(url, self.translator)
return recipe
except Exception as e:
self.service.logger.error(f"failed to scrape url during bulk url import {url}")

View File

@ -32,12 +32,13 @@ class RecipeScraper:
self.scrapers = scrapers
self.translator = translator
async def scrape(self, url: str) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
async def scrape(self, url: str, html: str | None = None) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
"""
Scrapes a recipe from the web.
Skips the network request if `html` is provided.
"""
raw_html = await safe_scrape_html(url)
raw_html = html or await safe_scrape_html(url)
for scraper_type in self.scrapers:
scraper = scraper_type(url, self.translator, raw_html=raw_html)
result = await scraper.parse()

View File

@ -21,24 +21,28 @@ class ParserErrors(str, Enum):
CONNECTION_ERROR = "CONNECTION_ERROR"
async def create_from_url(url: str, translator: Translator) -> tuple[Recipe, ScrapedExtras | None]:
async def create_from_html(
url: str, translator: Translator, html: str | None = None
) -> tuple[Recipe, ScrapedExtras | None]:
"""Main entry point for generating a recipe from a URL. Pass in a URL and
a Recipe object will be returned if successful.
a Recipe object will be returned if successful. Optionally pass in the HTML to skip fetching it.
Args:
url (str): a valid string representing a URL
html (str | None): optional HTML string to skip network request. Defaults to None.
Returns:
Recipe: Recipe Object
"""
scraper = RecipeScraper(translator)
extracted_url = regex_search(r"(https?://|www\.)[^\s]+", url)
if not html:
extracted_url = regex_search(r"(https?://|www\.)[^\s]+", url)
if not extracted_url:
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})
url = extracted_url.group(0)
if not extracted_url:
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})
new_recipe, extras = await scraper.scrape(extracted_url.group(0))
new_recipe, extras = await scraper.scrape(url, html)
if not new_recipe:
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})

View File

@ -119,6 +119,14 @@ class ABCScraperStrategy(ABC):
class RecipeScraperPackage(ABCScraperStrategy):
@staticmethod
def ld_json_to_html(ld_json: str) -> str:
return (
"<!DOCTYPE html><html><head>"
f'<script type="application/ld+json">{ld_json}</script>'
"</head><body></body></html>"
)
async def get_html(self, url: str) -> str:
return self.raw_html or await safe_scrape_html(url)
@ -192,7 +200,7 @@ class RecipeScraperPackage(ABCScraperStrategy):
total_time=try_get_default(None, "totalTime", None, cleaner.clean_time, translator=self.translator),
prep_time=try_get_default(None, "prepTime", None, cleaner.clean_time, translator=self.translator),
perform_time=cook_time,
org_url=url,
org_url=url or try_get_default(None, "url", None, cleaner.clean_string),
)
return recipe, extras
@ -201,7 +209,8 @@ class RecipeScraperPackage(ABCScraperStrategy):
recipe_html = await self.get_html(self.url)
try:
scraped_schema = scrape_html(recipe_html, org_url=self.url, supported_only=False)
# scrape_html requires a URL, but we might not have one, so we default to a dummy URL
scraped_schema = scrape_html(recipe_html, org_url=self.url or "https://example.com", supported_only=False)
except (NoSchemaFoundInWildMode, AttributeError):
self.logger.error(f"Recipe Scraper was unable to extract a recipe from {self.url}")
return None
@ -300,11 +309,10 @@ class RecipeScraperOpenAI(RecipeScraperPackage):
prompt = service.get_prompt("recipes.scrape-recipe")
response_json = await service.get_response(prompt, text, force_json_response=True)
return (
"<!DOCTYPE html><html><head>"
f'<script type="application/ld+json">{response_json}</script>'
"</head><body></body></html>"
)
if not response_json:
raise Exception("OpenAI did not return any data")
return self.ld_json_to_html(response_json)
except Exception:
self.logger.exception(f"OpenAI was unable to extract a recipe from {url}")
return ""
@ -340,7 +348,7 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
"recipeIngredient": ["Could not detect ingredients"],
"recipeInstructions": [{"text": "Could not detect instructions"}],
"slug": slugify(og_field(properties, "og:title")),
"orgURL": self.url,
"orgURL": self.url or og_field(properties, "og:url"),
"categories": [],
"tags": og_fields(properties, "og:article:tag"),
"dateAdded": None,

View File

@ -38,7 +38,7 @@ def test_openai_create_recipe_from_image(
monkeypatch.setattr(OpenAIService, "get_response", mock_get_response)
with open(test_image_jpg, "rb") as f:
r = api_client.post(
api_routes.recipes_create_from_image,
api_routes.recipes_create_image,
files={"images": ("test_image_jpg.jpg", f, "image/jpeg")},
data={"extension": "jpg"},
headers=unique_user.token,

View File

@ -31,7 +31,7 @@ from tests import utils
from tests.utils import api_routes
from tests.utils.factories import random_int, random_string
from tests.utils.fixture_schemas import TestUser
from tests.utils.recipe_data import RecipeSiteTestCase, get_recipe_test_cases
from tests.utils.recipe_data import get_recipe_test_cases
recipe_test_data = get_recipe_test_cases()
@ -44,7 +44,7 @@ def tempdir() -> Generator[str, None, None]:
def zip_recipe(tempdir: str, recipe: RecipeSummary) -> dict:
data_file = tempfile.NamedTemporaryFile(mode="w+", dir=tempdir, suffix=".json", delete=False)
json.dump(json.loads(recipe.json()), data_file)
json.dump(json.loads(recipe.model_dump_json()), data_file)
data_file.flush()
zip_file = shutil.make_archive(os.path.join(tempdir, "zipfile"), "zip")
@ -94,36 +94,80 @@ def open_graph_override(html: str):
return get_html
@pytest.mark.parametrize("recipe_data", recipe_test_data)
def test_create_by_url(
api_client: TestClient,
recipe_data: RecipeSiteTestCase,
unique_user: TestUser,
monkeypatch: MonkeyPatch,
):
# Override init function for AbstractScraper to use the test html instead of calling the url
monkeypatch.setattr(
AbstractScraper,
"__init__",
get_init(recipe_data.html_file),
)
# Override the get_html method of the RecipeScraperOpenGraph to return the test html
for scraper_cls in DEFAULT_SCRAPER_STRATEGIES:
for recipe_data in recipe_test_data:
# Override init function for AbstractScraper to use the test html instead of calling the url
monkeypatch.setattr(
scraper_cls,
"get_html",
open_graph_override(recipe_data.html_file.read_text()),
AbstractScraper,
"__init__",
get_init(recipe_data.html_file),
)
# Override the get_html method of the RecipeScraperOpenGraph to return the test html
for scraper_cls in DEFAULT_SCRAPER_STRATEGIES:
monkeypatch.setattr(
scraper_cls,
"get_html",
open_graph_override(recipe_data.html_file.read_text()),
)
# Skip AsyncSafeTransport requests
async def return_empty_response(*args, **kwargs):
return Response(200, content=b"")
monkeypatch.setattr(
AsyncSafeTransport,
"handle_async_request",
return_empty_response,
)
# Skip image downloader
monkeypatch.setattr(
RecipeDataService,
"scrape_image",
lambda *_: "TEST_IMAGE",
)
# Skip AsyncSafeTransport requests
async def return_empty_response(*args, **kwargs):
return Response(200, content=b"")
api_client.delete(api_routes.recipes_slug(recipe_data.expected_slug), headers=unique_user.token)
monkeypatch.setattr(
AsyncSafeTransport,
"handle_async_request",
return_empty_response,
)
response = api_client.post(
api_routes.recipes_create_url,
json={"url": recipe_data.url, "include_tags": recipe_data.include_tags},
headers=unique_user.token,
)
assert response.status_code == 201
assert json.loads(response.text) == recipe_data.expected_slug
recipe = api_client.get(api_routes.recipes_slug(recipe_data.expected_slug), headers=unique_user.token)
assert recipe.status_code == 200
recipe_dict: dict = json.loads(recipe.text)
assert recipe_dict["slug"] == recipe_data.expected_slug
assert len(recipe_dict["recipeInstructions"]) == recipe_data.num_steps
assert len(recipe_dict["recipeIngredient"]) == recipe_data.num_ingredients
if not recipe_data.include_tags:
return
expected_tags = recipe_data.expected_tags or set()
assert len(recipe_dict["tags"]) == len(expected_tags)
for tag in recipe_dict["tags"]:
assert tag["name"] in expected_tags
@pytest.mark.parametrize("use_json", [True, False])
def test_create_by_html_or_json(
api_client: TestClient,
unique_user: TestUser,
monkeypatch: MonkeyPatch,
use_json: bool,
):
# Skip image downloader
monkeypatch.setattr(
RecipeDataService,
@ -131,11 +175,21 @@ def test_create_by_url(
lambda *_: "TEST_IMAGE",
)
recipe_data = recipe_test_data[0]
api_client.delete(api_routes.recipes_slug(recipe_data.expected_slug), headers=unique_user.token)
data = recipe_data.html_file.read_text()
if use_json:
soup = BeautifulSoup(data, "lxml")
ld_json_data = soup.find("script", type="application/ld+json")
if ld_json_data:
data = json.dumps(json.loads(ld_json_data.string))
else:
data = "{}"
response = api_client.post(
api_routes.recipes_create_url,
json={"url": recipe_data.url, "include_tags": recipe_data.include_tags},
api_routes.recipes_create_html_or_json,
json={"data": data, "include_tags": recipe_data.include_tags},
headers=unique_user.token,
)
@ -173,9 +227,7 @@ def test_create_recipe_from_zip(api_client: TestClient, unique_user: TestUser, t
slug=recipe_name,
)
r = api_client.post(
api_routes.recipes_create_from_zip, files=zip_recipe(tempdir, recipe), headers=unique_user.token
)
r = api_client.post(api_routes.recipes_create_zip, files=zip_recipe(tempdir, recipe), headers=unique_user.token)
assert r.status_code == 201
fetched_recipe = database.recipes.get_by_slug(unique_user.group_id, recipe.slug)
@ -193,9 +245,7 @@ def test_create_recipe_from_zip_invalid_group(api_client: TestClient, unique_use
slug=recipe_name,
)
r = api_client.post(
api_routes.recipes_create_from_zip, files=zip_recipe(tempdir, recipe), headers=unique_user.token
)
r = api_client.post(api_routes.recipes_create_zip, files=zip_recipe(tempdir, recipe), headers=unique_user.token)
assert r.status_code == 201
fetched_recipe = database.recipes.get_by_slug(unique_user.group_id, recipe.slug)
@ -216,9 +266,7 @@ def test_create_recipe_from_zip_invalid_user(api_client: TestClient, unique_user
slug=recipe_name,
)
r = api_client.post(
api_routes.recipes_create_from_zip, files=zip_recipe(tempdir, recipe), headers=unique_user.token
)
r = api_client.post(api_routes.recipes_create_zip, files=zip_recipe(tempdir, recipe), headers=unique_user.token)
assert r.status_code == 201
fetched_recipe = database.recipes.get_by_slug(unique_user.group_id, recipe.slug)
@ -245,9 +293,7 @@ def test_create_recipe_from_zip_existing_category(api_client: TestClient, unique
recipe_category=[category],
)
r = api_client.post(
api_routes.recipes_create_from_zip, files=zip_recipe(tempdir, recipe), headers=unique_user.token
)
r = api_client.post(api_routes.recipes_create_zip, files=zip_recipe(tempdir, recipe), headers=unique_user.token)
assert r.status_code == 201
fetched_recipe = database.recipes.get_by_slug(unique_user.group_id, recipe.slug)
@ -274,9 +320,7 @@ def test_create_recipe_from_zip_existing_tag(api_client: TestClient, unique_user
tags=[tag],
)
r = api_client.post(
api_routes.recipes_create_from_zip, files=zip_recipe(tempdir, recipe), headers=unique_user.token
)
r = api_client.post(api_routes.recipes_create_zip, files=zip_recipe(tempdir, recipe), headers=unique_user.token)
assert r.status_code == 201
fetched_recipe = database.recipes.get_by_slug(unique_user.group_id, recipe.slug)
@ -306,9 +350,7 @@ def test_create_recipe_from_zip_existing_category_wrong_ids(
recipe_category=[invalid_category],
)
r = api_client.post(
api_routes.recipes_create_from_zip, files=zip_recipe(tempdir, recipe), headers=unique_user.token
)
r = api_client.post(api_routes.recipes_create_zip, files=zip_recipe(tempdir, recipe), headers=unique_user.token)
assert r.status_code == 201
fetched_recipe = database.recipes.get_by_slug(unique_user.group_id, recipe.slug)
@ -336,9 +378,7 @@ def test_create_recipe_from_zip_existing_tag_wrong_ids(api_client: TestClient, u
tags=[invalid_tag],
)
r = api_client.post(
api_routes.recipes_create_from_zip, files=zip_recipe(tempdir, recipe), headers=unique_user.token
)
r = api_client.post(api_routes.recipes_create_zip, files=zip_recipe(tempdir, recipe), headers=unique_user.token)
assert r.status_code == 201
fetched_recipe = database.recipes.get_by_slug(unique_user.group_id, recipe.slug)
@ -363,9 +403,7 @@ def test_create_recipe_from_zip_invalid_category(api_client: TestClient, unique_
recipe_category=[invalid_category],
)
r = api_client.post(
api_routes.recipes_create_from_zip, files=zip_recipe(tempdir, recipe), headers=unique_user.token
)
r = api_client.post(api_routes.recipes_create_zip, files=zip_recipe(tempdir, recipe), headers=unique_user.token)
assert r.status_code == 201
fetched_recipe = database.recipes.get_by_slug(unique_user.group_id, recipe.slug)
@ -393,9 +431,7 @@ def test_create_recipe_from_zip_invalid_tag(api_client: TestClient, unique_user:
tags=[invalid_tag],
)
r = api_client.post(
api_routes.recipes_create_from_zip, files=zip_recipe(tempdir, recipe), headers=unique_user.token
)
r = api_client.post(api_routes.recipes_create_zip, files=zip_recipe(tempdir, recipe), headers=unique_user.token)
assert r.status_code == 201
fetched_recipe = database.recipes.get_by_slug(unique_user.group_id, recipe.slug)
@ -408,13 +444,12 @@ def test_create_recipe_from_zip_invalid_tag(api_client: TestClient, unique_user:
assert fetched_recipe.tags[0].slug == invalid_name
@pytest.mark.parametrize("recipe_data", recipe_test_data)
def test_read_update(
api_client: TestClient,
recipe_data: RecipeSiteTestCase,
unique_user: TestUser,
recipe_categories: list[RecipeCategory],
):
recipe_data = recipe_test_data[0]
recipe_url = api_routes.recipes_slug(recipe_data.expected_slug)
response = api_client.get(recipe_url, headers=unique_user.token)
assert response.status_code == 200
@ -448,8 +483,9 @@ def test_read_update(
assert cats[0]["name"] in test_name
@pytest.mark.parametrize("recipe_data", recipe_test_data)
def test_duplicate(api_client: TestClient, recipe_data: RecipeSiteTestCase, unique_user: TestUser):
def test_duplicate(api_client: TestClient, unique_user: TestUser):
recipe_data = recipe_test_data[0]
# Initial get of the original recipe
original_recipe_url = api_routes.recipes_slug(recipe_data.expected_slug)
response = api_client.get(original_recipe_url, headers=unique_user.token)
@ -531,12 +567,11 @@ def test_duplicate(api_client: TestClient, recipe_data: RecipeSiteTestCase, uniq
# This needs to happen after test_duplicate,
# otherwise that one will run into problems with comparing the instruction/ingredient lists
@pytest.mark.parametrize("recipe_data", recipe_test_data)
def test_update_with_empty_relationship(
api_client: TestClient,
recipe_data: RecipeSiteTestCase,
unique_user: TestUser,
):
recipe_data = recipe_test_data[0]
recipe_url = api_routes.recipes_slug(recipe_data.expected_slug)
response = api_client.get(recipe_url, headers=unique_user.token)
assert response.status_code == 200
@ -559,8 +594,8 @@ def test_update_with_empty_relationship(
assert recipe["recipeIngredient"] == []
@pytest.mark.parametrize("recipe_data", recipe_test_data)
def test_rename(api_client: TestClient, recipe_data: RecipeSiteTestCase, unique_user: TestUser):
def test_rename(api_client: TestClient, unique_user: TestUser):
recipe_data = recipe_test_data[0]
recipe_url = api_routes.recipes_slug(recipe_data.expected_slug)
response = api_client.get(recipe_url, headers=unique_user.token)
assert response.status_code == 200
@ -614,8 +649,8 @@ def test_remove_notes(api_client: TestClient, unique_user: TestUser):
assert len(recipe.get("notes", [])) == 0
@pytest.mark.parametrize("recipe_data", recipe_test_data)
def test_delete(api_client: TestClient, recipe_data: RecipeSiteTestCase, unique_user: TestUser):
def test_delete(api_client: TestClient, unique_user: TestUser):
recipe_data = recipe_test_data[0]
response = api_client.delete(api_routes.recipes_slug(recipe_data.expected_slug), headers=unique_user.token)
assert response.status_code == 200
@ -630,7 +665,7 @@ def test_recipe_crud_404(api_client: TestClient, unique_user: TestUser):
response = api_client.delete(api_routes.recipes_slug("test"), headers=unique_user.token)
assert response.status_code == 404
response = api_client.patch(api_routes.recipes_create_url, json={"test": "stest"}, headers=unique_user.token)
response = api_client.patch(api_routes.recipes_slug("test"), json={"test": "stest"}, headers=unique_user.token)
assert response.status_code == 404

View File

@ -20,7 +20,7 @@ and then use this test case by removing the `@pytest.mark.skip` and than testing
@pytest.mark.asyncio
async def test_recipe_parser(recipe_test_data: RecipeSiteTestCase):
translator = local_provider()
recipe, _ = await scraper.create_from_url(recipe_test_data.url, translator)
recipe, _ = await scraper.create_from_html(recipe_test_data.url, translator)
assert recipe.slug == recipe_test_data.expected_slug
assert len(recipe.recipe_instructions or []) == recipe_test_data.num_steps

View File

@ -147,14 +147,16 @@ recipes_bulk_actions_settings = "/api/recipes/bulk-actions/settings"
"""`/api/recipes/bulk-actions/settings`"""
recipes_bulk_actions_tag = "/api/recipes/bulk-actions/tag"
"""`/api/recipes/bulk-actions/tag`"""
recipes_create_from_image = "/api/recipes/create-from-image"
"""`/api/recipes/create-from-image`"""
recipes_create_from_zip = "/api/recipes/create-from-zip"
"""`/api/recipes/create-from-zip`"""
recipes_create_url = "/api/recipes/create-url"
"""`/api/recipes/create-url`"""
recipes_create_url_bulk = "/api/recipes/create-url/bulk"
"""`/api/recipes/create-url/bulk`"""
recipes_create_html_or_json = "/api/recipes/create/html-or-json"
"""`/api/recipes/create/html-or-json`"""
recipes_create_image = "/api/recipes/create/image"
"""`/api/recipes/create/image`"""
recipes_create_url = "/api/recipes/create/url"
"""`/api/recipes/create/url`"""
recipes_create_url_bulk = "/api/recipes/create/url/bulk"
"""`/api/recipes/create/url/bulk`"""
recipes_create_zip = "/api/recipes/create/zip"
"""`/api/recipes/create/zip`"""
recipes_exports = "/api/recipes/exports"
"""`/api/recipes/exports`"""
recipes_test_scrape_url = "/api/recipes/test-scrape-url"