mirror of
https://github.com/mealie-recipes/mealie.git
synced 2025-05-24 01:12:54 -04:00
Feature/improve error message on scrape (#476)
* add better feedback on failed scrape * fix json download link * add better recipe parser * dump deps * fix force open on mobile * formatting * rewrite scraper to use new library * fix failing tests * bookmarklet support * bookmarklet instructions * recipes changelog Co-authored-by: hay-kot <hay-kot@pm.me>
This commit is contained in:
parent
3702331630
commit
a78fbea711
@ -26,6 +26,12 @@
|
||||
## Features and Improvements
|
||||
|
||||
### Highlights
|
||||
- Recipe Parser
|
||||
- Recipes can now be imported with a bookmarklet!
|
||||
- Significant improvement in supported sites with the new [Recipe Scraper Library](https://github.com/hhursev/recipe-scrapers)
|
||||
- UI Debugging now available at `/recipes/debugger`
|
||||
- Better error messages on failure
|
||||
- ⚠️ last_recipe.json is now depreciated
|
||||
- Beta Support for Postgres! 🎉 See the getting started page for details
|
||||
- Recipe Features
|
||||
- Step Sections
|
||||
|
@ -3,6 +3,18 @@
|
||||
## URL Import
|
||||
Adding a recipe can be as easy as copying the recipe URL into mealie and letting the web scrapper try to pull down the information. Currently this scraper is implemented with [scrape-schema-recipe package](https://pypi.org/project/scrape-schema-recipe/). You may have mixed results on some websites, especially with blogs or non specific recipe websites. See the bulk import Option below for another a convenient way to add blog style recipes into Mealie.
|
||||
|
||||
## Using Bookmarklets
|
||||
|
||||
You can use bookmarklets to generate a bookmark that will take your current location, and open a new tab that will try to import that URL into Mealie.
|
||||
|
||||
You can use a [bookmarklet generator site](https://caiorss.github.io/bookmarklet-maker/) and the code below to generate a bookmark for your site. Just change the `http://localhost:8080` to your sites web address and follow the instructions. Note that there is no trailing `/`.
|
||||
|
||||
```js
|
||||
var url = document.URL ;
|
||||
var mealie = "http://localhost:8080"
|
||||
var dest = mealie + "/?recipe_import_url=" + url
|
||||
window.open(dest, '_blank')
|
||||
```
|
||||
|
||||
## Recipe Editor
|
||||
Recipes can be edited and created via the UI. This is done with both a form based approach where you have a UI to work with as well as with a in browser JSON Editor. The JSON editor allows you to easily copy and paste data from other sources.
|
||||
|
15465
frontend/package-lock.json
generated
15465
frontend/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -22,7 +22,7 @@
|
||||
"vue-i18n": "^8.24.1",
|
||||
"vue-router": "^3.5.1",
|
||||
"vuedraggable": "^2.24.3",
|
||||
"vuetify": "^2.4.6",
|
||||
"vuetify": "^2.5.3",
|
||||
"vuex": "^3.6.2",
|
||||
"vuex-persistedstate": "^4.0.0-beta.3"
|
||||
},
|
||||
|
@ -12,6 +12,7 @@ const recipeURLs = {
|
||||
allRecipesByCategory: prefix + "category",
|
||||
create: prefix + "create",
|
||||
createByURL: prefix + "create-url",
|
||||
testParseURL: prefix + "test-scrape-url",
|
||||
recipe: slug => prefix + slug,
|
||||
update: slug => prefix + slug,
|
||||
delete: slug => prefix + slug,
|
||||
@ -29,11 +30,8 @@ export const recipeAPI = {
|
||||
* @returns {string} Recipe Slug
|
||||
*/
|
||||
async createByURL(recipeURL) {
|
||||
const response = await apiReq.post(
|
||||
recipeURLs.createByURL,
|
||||
{ url: recipeURL },
|
||||
() => i18n.t("recipe.recipe-creation-failed"),
|
||||
() => i18n.t("recipe.recipe-created")
|
||||
const response = await apiReq.post(recipeURLs.createByURL, { url: recipeURL }, false, () =>
|
||||
i18n.t("recipe.recipe-created")
|
||||
);
|
||||
|
||||
store.dispatch("requestRecentRecipes");
|
||||
@ -186,4 +184,9 @@ export const recipeAPI = {
|
||||
const response = await apiReq.delete(API_ROUTES.recipesSlugCommentsId(slug, id));
|
||||
return response.data;
|
||||
},
|
||||
|
||||
async testScrapeURL(url) {
|
||||
const response = await apiReq.post(recipeURLs.testParseURL, { url: url });
|
||||
return response.data;
|
||||
},
|
||||
};
|
||||
|
@ -3,9 +3,7 @@
|
||||
<v-dialog v-model="addRecipe" width="650" @click:outside="reset">
|
||||
<v-card :loading="processing">
|
||||
<v-app-bar dark color="primary mb-2">
|
||||
<v-icon large left v-if="!processing">
|
||||
mdi-link
|
||||
</v-icon>
|
||||
<v-icon large left v-if="!processing"> mdi-link </v-icon>
|
||||
<v-progress-circular v-else indeterminate color="white" large class="mr-2"> </v-progress-circular>
|
||||
|
||||
<v-toolbar-title class="headline">
|
||||
@ -28,19 +26,58 @@
|
||||
persistent-hint
|
||||
></v-text-field>
|
||||
|
||||
<v-alert v-if="error" color="red" outlined type="success">
|
||||
{{ $t("new-recipe.error-message") }}
|
||||
</v-alert>
|
||||
<v-expand-transition>
|
||||
<v-alert v-if="error" color="error" class="mt-6 white--text">
|
||||
<v-card-title class="ma-0 pa-0">
|
||||
<v-icon left color="white" x-large> mdi-robot </v-icon>
|
||||
{{ $t("new-recipe.error-title") }}
|
||||
</v-card-title>
|
||||
<v-divider class="my-3 mx-2"></v-divider>
|
||||
|
||||
<p>
|
||||
{{ $t("new-recipe.error-details") }}
|
||||
</p>
|
||||
<div class="d-flex row justify-space-around my-3 force-white">
|
||||
<a
|
||||
class="dark"
|
||||
href="https://developers.google.com/search/docs/data-types/recipe"
|
||||
target="_blank"
|
||||
rel="noreferrer nofollow"
|
||||
>
|
||||
Google ld+json Info
|
||||
</a>
|
||||
<a href="https://github.com/hay-kot/mealie/issues" target="_blank" rel="noreferrer nofollow">
|
||||
GitHub Issues
|
||||
</a>
|
||||
<a href="https://schema.org/Recipe" target="_blank" rel="noreferrer nofollow">
|
||||
Recipe Markup Specification
|
||||
</a>
|
||||
</div>
|
||||
<div class="d-flex justify-end">
|
||||
<v-btn
|
||||
white
|
||||
outlined
|
||||
:to="{ path: '/recipes/debugger', query: { test_url: recipeURL } }"
|
||||
@click="addRecipe = false"
|
||||
>
|
||||
<v-icon> mdi-external-link </v-icon>
|
||||
View Scraped Data
|
||||
</v-btn>
|
||||
</div>
|
||||
</v-alert>
|
||||
</v-expand-transition>
|
||||
</v-card-text>
|
||||
|
||||
<v-divider></v-divider>
|
||||
|
||||
<v-card-actions>
|
||||
<v-spacer></v-spacer>
|
||||
<v-btn color="grey" text @click="reset">
|
||||
<v-icon left> mdi-close </v-icon>
|
||||
{{ $t("general.close") }}
|
||||
</v-btn>
|
||||
<v-btn color="success" text type="submit" :loading="processing">
|
||||
<v-spacer></v-spacer>
|
||||
<v-btn color="success" type="submit" :loading="processing">
|
||||
<v-icon left> {{ $globals.icons.create }} </v-icon>
|
||||
{{ $t("general.submit") }}
|
||||
</v-btn>
|
||||
</v-card-actions>
|
||||
@ -65,7 +102,6 @@
|
||||
|
||||
<script>
|
||||
import { api } from "@/api";
|
||||
|
||||
export default {
|
||||
props: {
|
||||
absolute: {
|
||||
@ -77,14 +113,32 @@ export default {
|
||||
error: false,
|
||||
fab: false,
|
||||
addRecipe: false,
|
||||
recipeURL: "",
|
||||
processing: false,
|
||||
};
|
||||
},
|
||||
|
||||
mounted() {
|
||||
if (this.$route.query.recipe_import_url) {
|
||||
this.addRecipe = true;
|
||||
this.createRecipe();
|
||||
}
|
||||
},
|
||||
|
||||
computed: {
|
||||
recipeURL: {
|
||||
set(recipe_import_url) {
|
||||
this.$router.replace({ query: { ...this.$route.query, recipe_import_url } });
|
||||
},
|
||||
get() {
|
||||
return this.$route.query.recipe_import_url || "";
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
methods: {
|
||||
async createRecipe() {
|
||||
if (this.$refs.urlForm.validate()) {
|
||||
this.error = false;
|
||||
if (this.$refs.urlForm === undefined || this.$refs.urlForm.validate()) {
|
||||
this.processing = true;
|
||||
const response = await api.recipes.createByURL(this.recipeURL);
|
||||
this.processing = false;
|
||||
@ -106,11 +160,20 @@ export default {
|
||||
this.processing = false;
|
||||
},
|
||||
isValidWebUrl(url) {
|
||||
let regEx = /^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,256}\b([-a-zA-Z0-9()@:%_+.~#?&//=]*)$/gm;
|
||||
let regEx =
|
||||
/^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,256}\b([-a-zA-Z0-9()@:%_+.~#?&//=]*)$/gm;
|
||||
return regEx.test(url) ? true : "Must be a Valid URL";
|
||||
},
|
||||
|
||||
bookmark() {
|
||||
return `javascript:(function()%7Bvar url %3D document.URL %3B%0Avar mealie %3D "http%3A%2F%2Flocalhost%3A8080%2F%23"%0Avar dest %3D mealie %2B "%2F%3Frecipe_import_url%3D" %2B url%0Awindow.open(dest%2C '_blank')%7D)()%3B`;
|
||||
},
|
||||
},
|
||||
};
|
||||
</script>
|
||||
|
||||
<style></style>
|
||||
<style>
|
||||
.force-white > a {
|
||||
color: white !important;
|
||||
}
|
||||
</style>
|
||||
|
@ -6,9 +6,7 @@
|
||||
|
||||
<v-list-item dense v-if="isLoggedIn" :to="`/user/${user.id}/favorites`">
|
||||
<v-list-item-icon>
|
||||
<v-icon>
|
||||
mdi-heart
|
||||
</v-icon>
|
||||
<v-icon> mdi-heart </v-icon>
|
||||
</v-list-item-icon>
|
||||
<v-list-item-content>
|
||||
<v-list-item-title> Favorites </v-list-item-title>
|
||||
@ -30,17 +28,13 @@
|
||||
<v-list nav dense class="fixedBottom" v-if="!isMain">
|
||||
<v-list-item href="https://github.com/sponsors/hay-kot" target="_target">
|
||||
<v-list-item-icon>
|
||||
<v-icon color="pink">
|
||||
mdi-heart
|
||||
</v-icon>
|
||||
<v-icon color="pink"> mdi-heart </v-icon>
|
||||
</v-list-item-icon>
|
||||
<v-list-item-title> {{ $t("about.support") }} </v-list-item-title>
|
||||
</v-list-item>
|
||||
<v-list-item to="/admin/about">
|
||||
<v-list-item-icon class="mr-3 pt-1">
|
||||
<v-icon :color="newVersionAvailable ? 'red--text' : ''">
|
||||
mdi-information
|
||||
</v-icon>
|
||||
<v-icon :color="newVersionAvailable ? 'red--text' : ''"> mdi-information </v-icon>
|
||||
</v-list-item-icon>
|
||||
<v-list-item-content>
|
||||
<v-list-item-title>
|
||||
@ -86,7 +80,8 @@ export default {
|
||||
},
|
||||
mounted() {
|
||||
this.getVersion();
|
||||
this.resetView();
|
||||
|
||||
this.showSidebar = !this.isMobile;
|
||||
},
|
||||
watch: {
|
||||
user() {
|
||||
@ -98,7 +93,6 @@ export default {
|
||||
isMain() {
|
||||
const testVal = this.$route.path.split("/");
|
||||
if (testVal[1] === "recipe") this.closeSidebar();
|
||||
else this.resetView();
|
||||
|
||||
return !(testVal[1] === "admin");
|
||||
},
|
||||
@ -135,7 +129,7 @@ export default {
|
||||
const pages = this.$store.getters.getCustomPages;
|
||||
if (pages.length > 0) {
|
||||
pages.sort((a, b) => a.position - b.position);
|
||||
return pages.map(x => ({
|
||||
return pages.map((x) => ({
|
||||
title: x.name,
|
||||
to: `/pages/${x.slug}`,
|
||||
icon: this.$globals.icons.pages,
|
||||
@ -217,9 +211,7 @@ export default {
|
||||
resetImage() {
|
||||
this.hideImage == false;
|
||||
},
|
||||
resetView() {
|
||||
this.showSidebar = !this.isMobile;
|
||||
},
|
||||
|
||||
toggleSidebar() {
|
||||
this.showSidebar = !this.showSidebar;
|
||||
},
|
||||
|
@ -179,7 +179,8 @@
|
||||
},
|
||||
"new-recipe": {
|
||||
"bulk-add": "Bulk Add",
|
||||
"error-message": "Looks like there was an error parsing the URL. Check the log and debug/last_recipe.json to see what went wrong.",
|
||||
"error-title": "Looks Like We Couldn't Find Anything",
|
||||
"error-details": "Only websites containing ld+json or microdata can be imported imported by Mealie. Most major recipe websites support this data structure. If your site cannot be imported but there is json data in the log, please submit a github issue with the URL and data.",
|
||||
"from-url": "Import a Recipe",
|
||||
"paste-in-your-recipe-data-each-line-will-be-treated-as-an-item-in-a-list": "Paste in your recipe data. Each line will be treated as an item in a list",
|
||||
"recipe-url": "Recipe URL",
|
||||
@ -251,7 +252,6 @@
|
||||
"total-time": "Total Time",
|
||||
"unable-to-delete-recipe": "Unable to Delete Recipe",
|
||||
"view-recipe": "View Recipe"
|
||||
|
||||
},
|
||||
"search": {
|
||||
"and": "and",
|
||||
|
62
frontend/src/pages/Recipe/ScraperDebugger.vue
Normal file
62
frontend/src/pages/Recipe/ScraperDebugger.vue
Normal file
@ -0,0 +1,62 @@
|
||||
<template>
|
||||
<v-container>
|
||||
<v-text-field v-model="testUrl" outlined single-line label="Recipe Url"> </v-text-field>
|
||||
<div class="d-flex">
|
||||
<v-btn class="mt-0 ml-auto" color="info" @click="getTestData">
|
||||
<v-icon left> mdi-test-tube </v-icon>
|
||||
Test Scrape
|
||||
</v-btn>
|
||||
</div>
|
||||
<VJsoneditor class="mt-2" v-model="recipeJson" height="1500px" :options="jsonEditorOptions" />
|
||||
</v-container>
|
||||
</template>
|
||||
|
||||
<script>
|
||||
import VJsoneditor from "v-jsoneditor";
|
||||
import { api } from "@/api";
|
||||
export default {
|
||||
components: {
|
||||
VJsoneditor,
|
||||
},
|
||||
data() {
|
||||
return {
|
||||
jsonEditorOptions: {
|
||||
mode: "code",
|
||||
search: false,
|
||||
mainMenuBar: false,
|
||||
},
|
||||
recipeJson: {},
|
||||
defaultMessage: { details: "site failed to return valid schema" },
|
||||
};
|
||||
},
|
||||
mounted() {
|
||||
if (this.$route.query.test_url) {
|
||||
this.getTestData();
|
||||
}
|
||||
},
|
||||
computed: {
|
||||
testUrl: {
|
||||
set(test_url) {
|
||||
this.$router.replace({ query: { ...this.$route.query, test_url } });
|
||||
},
|
||||
get() {
|
||||
return this.$route.query.test_url || "";
|
||||
},
|
||||
},
|
||||
},
|
||||
methods: {
|
||||
async getTestData() {
|
||||
const response = await api.recipes.testScrapeURL(this.testUrl).catch(() => {
|
||||
this.recipeJson = this.defaultMessage;
|
||||
});
|
||||
|
||||
if (response.length < 1) {
|
||||
this.recipeJson = this.defaultMessage;
|
||||
return;
|
||||
}
|
||||
|
||||
this.recipeJson = response;
|
||||
},
|
||||
},
|
||||
};
|
||||
</script>
|
@ -63,7 +63,7 @@ import RecipeViewer from "@/components/Recipe/RecipeViewer";
|
||||
import PrintView from "@/components/Recipe/PrintView";
|
||||
import RecipeEditor from "@/components/Recipe/RecipeEditor";
|
||||
import RecipeTimeCard from "@/components/Recipe/RecipeTimeCard.vue";
|
||||
import EditorButtonRow from "@/components/Recipe/EditorButtonRow";
|
||||
import EditorButtonRow from "@/components/Recipe/EditorButtonRow.vue";
|
||||
import NoRecipe from "@/components/Fallbacks/NoRecipe";
|
||||
import { user } from "@/mixins/user";
|
||||
import { router } from "@/routes";
|
||||
@ -133,7 +133,7 @@ export default {
|
||||
},
|
||||
|
||||
watch: {
|
||||
$route: function() {
|
||||
$route: function () {
|
||||
this.getRecipeDetails();
|
||||
},
|
||||
},
|
||||
|
@ -1,5 +1,6 @@
|
||||
const ViewRecipe = () => import(/* webpackChunkName: "recipes" */ "@/pages/Recipe/ViewRecipe");
|
||||
const NewRecipe = () => import(/* webpackChunkName: "recipes" */ "@/pages/Recipe/NewRecipe");
|
||||
const ScraperDebugger = () => import(/* webpackChunkName: "recipes" */ "@/pages/Recipe/ScraperDebugger");
|
||||
const CustomPage = () => import(/* webpackChunkName: "recipes" */ "@/pages/Recipes/CustomPage");
|
||||
const AllRecipes = () => import(/* webpackChunkName: "recipes" */ "@/pages/Recipes/AllRecipes");
|
||||
const CategoryTagPage = () => import(/* webpackChunkName: "recipes" */ "@/pages/Recipes/CategoryTagPage");
|
||||
@ -9,6 +10,7 @@ import { api } from "@/api";
|
||||
export const recipeRoutes = [
|
||||
// Recipes
|
||||
{ path: "/recipes/all", component: AllRecipes },
|
||||
{ path: "/recipes/debugger", component: ScraperDebugger },
|
||||
{ path: "/user/:id/favorites", component: Favorites },
|
||||
{ path: "/recipes/tag/:tag", component: CategoryTagPage },
|
||||
{ path: "/recipes/tag", component: CategoryTagPage },
|
||||
|
@ -13,6 +13,7 @@ from mealie.services.events import create_recipe_event
|
||||
from mealie.services.image.image import scrape_image, write_image
|
||||
from mealie.services.recipe.media import check_assets, delete_assets
|
||||
from mealie.services.scraper.scraper import create_from_url
|
||||
from scrape_schema_recipe import scrape_url
|
||||
from slugify import slugify
|
||||
from sqlalchemy.orm.session import Session
|
||||
|
||||
@ -41,6 +42,11 @@ def create_from_json(
|
||||
return recipe.slug
|
||||
|
||||
|
||||
@router.post("/test-scrape-url", dependencies=[Depends(get_current_user)])
|
||||
def test_parse_recipe_url(url: RecipeURLIn):
|
||||
return scrape_url(url.url)
|
||||
|
||||
|
||||
@router.post("/create-url", status_code=201, response_model=str)
|
||||
def parse_recipe_url(
|
||||
background_tasks: BackgroundTasks,
|
||||
|
@ -1,15 +1,14 @@
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from mealie.routes.deps import validate_file_token
|
||||
from starlette.responses import FileResponse
|
||||
from fastapi import HTTPException, status
|
||||
|
||||
router = APIRouter(prefix="/api/utils", tags=["Utils"], include_in_schema=True)
|
||||
|
||||
|
||||
@router.get("/download/{token}")
|
||||
@router.get("/download")
|
||||
async def download_file(file_path: Optional[Path] = Depends(validate_file_token)):
|
||||
"""Uses a file token obtained by an active user to retrieve a file from the operating
|
||||
system."""
|
||||
|
@ -42,6 +42,7 @@ def write_image(recipe_slug: str, file_data: bytes, extension: str) -> Path:
|
||||
|
||||
|
||||
def scrape_image(image_url: str, slug: str) -> Path:
|
||||
logger.info(f"Image URL: {image_url}")
|
||||
if isinstance(image_url, str): # Handles String Types
|
||||
image_url = image_url
|
||||
|
||||
@ -64,7 +65,7 @@ def scrape_image(image_url: str, slug: str) -> Path:
|
||||
|
||||
if r.status_code == 200:
|
||||
r.raw.decode_content = True
|
||||
|
||||
logger.info(f"File Name Suffix {filename.suffix}")
|
||||
write_image(slug, r.raw, filename.suffix)
|
||||
|
||||
filename.unlink(missing_ok=True)
|
||||
|
@ -39,6 +39,8 @@ def minify_image(image_file: Path, force=False) -> ImageSizes:
|
||||
min_dest = image_file.parent.joinpath("min-original.webp")
|
||||
tiny_dest = image_file.parent.joinpath("tiny-original.webp")
|
||||
|
||||
cleanup_images = False
|
||||
|
||||
if min_dest.exists() and tiny_dest.exists() and org_dest.exists() and not force:
|
||||
return
|
||||
try:
|
||||
|
@ -9,7 +9,7 @@ from mealie.db.database import db
|
||||
from mealie.schema.migration import MigrationImport
|
||||
from mealie.schema.recipe import Recipe
|
||||
from mealie.services.image import image
|
||||
from mealie.services.scraper.cleaner import Cleaner
|
||||
from mealie.services.scraper import cleaner
|
||||
from mealie.utils.unzip import unpack_zip
|
||||
from pydantic import BaseModel
|
||||
|
||||
@ -144,7 +144,7 @@ class MigrationBase(BaseModel):
|
||||
"""Calls the rewrite_alias function and the Cleaner.clean function on a
|
||||
dictionary and returns the result unpacked into a Recipe object"""
|
||||
recipe_dict = self.rewrite_alias(recipe_dict)
|
||||
recipe_dict = Cleaner.clean(recipe_dict, url=recipe_dict.get("org_url", None))
|
||||
recipe_dict = cleaner.clean(recipe_dict, url=recipe_dict.get("org_url", None))
|
||||
|
||||
return Recipe(**recipe_dict)
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
import html
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List
|
||||
@ -6,157 +7,157 @@ from typing import List
|
||||
from slugify import slugify
|
||||
|
||||
|
||||
class Cleaner:
|
||||
"""A Namespace for utility function to clean recipe data extracted
|
||||
from a url and returns a dictionary that is ready for import into
|
||||
the database. Cleaner.clean is the main entrypoint
|
||||
def clean(recipe_data: dict, url=None) -> dict:
|
||||
"""Main entrypoint to clean a recipe extracted from the web
|
||||
and format the data into an accectable format for the database
|
||||
|
||||
Args:
|
||||
recipe_data (dict): raw recipe dicitonary
|
||||
|
||||
Returns:
|
||||
dict: cleaned recipe dictionary
|
||||
"""
|
||||
recipe_data["description"] = clean_string(recipe_data.get("description", ""))
|
||||
|
||||
@staticmethod
|
||||
def clean(recipe_data: dict, url=None) -> dict:
|
||||
"""Main entrypoint to clean a recipe extracted from the web
|
||||
and format the data into an accectable format for the database
|
||||
# Times
|
||||
recipe_data["prepTime"] = clean_time(recipe_data.get("prepTime"))
|
||||
recipe_data["performTime"] = clean_time(recipe_data.get("performTime"))
|
||||
recipe_data["totalTime"] = clean_time(recipe_data.get("totalTime"))
|
||||
recipe_data["recipeCategory"] = category(recipe_data.get("recipeCategory", []))
|
||||
|
||||
Args:
|
||||
recipe_data (dict): raw recipe dicitonary
|
||||
recipe_data["recipeYield"] = yield_amount(recipe_data.get("recipeYield"))
|
||||
recipe_data["recipeIngredient"] = ingredient(recipe_data.get("recipeIngredient"))
|
||||
recipe_data["recipeInstructions"] = instructions(recipe_data.get("recipeInstructions"))
|
||||
recipe_data["image"] = image(recipe_data.get("image"))
|
||||
recipe_data["slug"] = slugify(recipe_data.get("name"))
|
||||
recipe_data["orgURL"] = url
|
||||
|
||||
Returns:
|
||||
dict: cleaned recipe dictionary
|
||||
"""
|
||||
recipe_data["description"] = Cleaner.html(recipe_data.get("description", ""))
|
||||
return recipe_data
|
||||
|
||||
# Times
|
||||
recipe_data["prepTime"] = Cleaner.time(recipe_data.get("prepTime"))
|
||||
recipe_data["performTime"] = Cleaner.time(recipe_data.get("performTime"))
|
||||
recipe_data["totalTime"] = Cleaner.time(recipe_data.get("totalTime"))
|
||||
recipe_data["recipeCategory"] = Cleaner.category(recipe_data.get("recipeCategory", []))
|
||||
|
||||
recipe_data["recipeYield"] = Cleaner.yield_amount(recipe_data.get("recipeYield"))
|
||||
recipe_data["recipeIngredient"] = Cleaner.ingredient(recipe_data.get("recipeIngredient"))
|
||||
recipe_data["recipeInstructions"] = Cleaner.instructions(recipe_data.get("recipeInstructions"))
|
||||
recipe_data["image"] = Cleaner.image(recipe_data.get("image"))
|
||||
recipe_data["slug"] = slugify(recipe_data.get("name"))
|
||||
recipe_data["orgURL"] = url
|
||||
def clean_string(text: str) -> str:
|
||||
cleaned_text = html.unescape(text)
|
||||
cleaned_text = re.sub("<[^<]+?>", "", cleaned_text)
|
||||
cleaned_text = re.sub(" +", " ", cleaned_text)
|
||||
cleaned_text = re.sub("</p>", "\n", cleaned_text)
|
||||
cleaned_text = re.sub(r"\n\s*\n", "\n\n", cleaned_text)
|
||||
cleaned_text = cleaned_text.replace("\xa0", " ").replace("\t", " ").strip()
|
||||
return cleaned_text
|
||||
|
||||
return recipe_data
|
||||
|
||||
@staticmethod
|
||||
def category(category: str):
|
||||
if isinstance(category, str) and category != "":
|
||||
return [category]
|
||||
else:
|
||||
return []
|
||||
def category(category: str):
|
||||
if isinstance(category, str) and category != "":
|
||||
return [category]
|
||||
else:
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def html(raw_html):
|
||||
cleanr = re.compile("<.*?>")
|
||||
return re.sub(cleanr, "", raw_html)
|
||||
|
||||
@staticmethod
|
||||
def image(image=None) -> str:
|
||||
if not image:
|
||||
return "no image"
|
||||
if isinstance(image, list):
|
||||
return image[0]
|
||||
elif isinstance(image, dict):
|
||||
return image["url"]
|
||||
elif isinstance(image, str):
|
||||
return image
|
||||
else:
|
||||
raise Exception(f"Unrecognised image URL format: {image}")
|
||||
def clean_html(raw_html):
|
||||
cleanr = re.compile("<.*?>")
|
||||
return re.sub(cleanr, "", raw_html)
|
||||
|
||||
@staticmethod
|
||||
def instructions(instructions) -> List[dict]:
|
||||
if not instructions:
|
||||
return []
|
||||
|
||||
if isinstance(instructions[0], list):
|
||||
instructions = instructions[0]
|
||||
def image(image=None) -> str:
|
||||
if not image:
|
||||
return "no image"
|
||||
if isinstance(image, list):
|
||||
return image[0]
|
||||
elif isinstance(image, dict):
|
||||
return image["url"]
|
||||
elif isinstance(image, str):
|
||||
return image
|
||||
else:
|
||||
raise Exception(f"Unrecognised image URL format: {image}")
|
||||
|
||||
# One long string split by (possibly multiple) new lines
|
||||
if isinstance(instructions, str):
|
||||
return [{"text": Cleaner._instruction(line)} for line in instructions.splitlines() if line]
|
||||
|
||||
# Plain strings in a list
|
||||
elif isinstance(instructions, list) and isinstance(instructions[0], str):
|
||||
return [{"text": Cleaner._instruction(step)} for step in instructions]
|
||||
def instructions(instructions) -> List[dict]:
|
||||
try:
|
||||
instructions = json.loads(instructions)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Dictionaries (let's assume it's a HowToStep) in a list
|
||||
elif isinstance(instructions, list) and isinstance(instructions[0], dict):
|
||||
# Try List of Dictionary without "@type" or "type"
|
||||
if not instructions[0].get("@type", False) and not instructions[0].get("type", False):
|
||||
return [{"text": Cleaner._instruction(step["text"])} for step in instructions]
|
||||
if not instructions:
|
||||
return []
|
||||
|
||||
if isinstance(instructions, list) and isinstance(instructions[0], list):
|
||||
instructions = instructions[0]
|
||||
|
||||
# One long string split by (possibly multiple) new lines
|
||||
if isinstance(instructions, str):
|
||||
return [{"text": _instruction(line)} for line in instructions.splitlines() if line]
|
||||
|
||||
# Plain strings in a list
|
||||
elif isinstance(instructions, list) and isinstance(instructions[0], str):
|
||||
return [{"text": _instruction(step)} for step in instructions]
|
||||
|
||||
# Dictionaries (let's assume it's a HowToStep) in a list
|
||||
elif isinstance(instructions, list) and isinstance(instructions[0], dict):
|
||||
# Try List of Dictionary without "@type" or "type"
|
||||
if not instructions[0].get("@type", False) and not instructions[0].get("type", False):
|
||||
return [{"text": _instruction(step["text"])} for step in instructions]
|
||||
|
||||
try:
|
||||
# If HowToStep is under HowToSection
|
||||
sectionSteps = []
|
||||
for step in instructions:
|
||||
if step["@type"] == "HowToSection":
|
||||
[sectionSteps.append(item) for item in step["itemListElement"]]
|
||||
|
||||
if len(sectionSteps) > 0:
|
||||
return [{"text": _instruction(step["text"])} for step in sectionSteps if step["@type"] == "HowToStep"]
|
||||
|
||||
return [{"text": _instruction(step["text"])} for step in instructions if step["@type"] == "HowToStep"]
|
||||
except Exception as e:
|
||||
print(e)
|
||||
# Not "@type", try "type"
|
||||
try:
|
||||
# If HowToStep is under HowToSection
|
||||
sectionSteps = []
|
||||
for step in instructions:
|
||||
if step["@type"] == "HowToSection":
|
||||
[sectionSteps.append(item) for item in step["itemListElement"]]
|
||||
|
||||
if len(sectionSteps) > 0:
|
||||
return [
|
||||
{"text": Cleaner._instruction(step["text"])}
|
||||
for step in sectionSteps
|
||||
if step["@type"] == "HowToStep"
|
||||
]
|
||||
|
||||
return [
|
||||
{"text": Cleaner._instruction(step["text"])}
|
||||
{"text": _instruction(step["properties"]["text"])}
|
||||
for step in instructions
|
||||
if step["@type"] == "HowToStep"
|
||||
if step["type"].find("HowToStep") > -1
|
||||
]
|
||||
except Exception as e:
|
||||
print(e)
|
||||
# Not "@type", try "type"
|
||||
try:
|
||||
return [
|
||||
{"text": Cleaner._instruction(step["properties"]["text"])}
|
||||
for step in instructions
|
||||
if step["type"].find("HowToStep") > -1
|
||||
]
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
else:
|
||||
raise Exception(f"Unrecognised instruction format: {instructions}")
|
||||
else:
|
||||
raise Exception(f"Unrecognised instruction format: {instructions}")
|
||||
|
||||
@staticmethod
|
||||
def _instruction(line) -> str:
|
||||
clean_line = Cleaner.html(line.strip())
|
||||
# Some sites erroneously escape their strings on multiple levels
|
||||
while not clean_line == (clean_line := html.unescape(clean_line)):
|
||||
pass
|
||||
return clean_line
|
||||
|
||||
@staticmethod
|
||||
def ingredient(ingredients: list) -> str:
|
||||
if ingredients:
|
||||
return [Cleaner.html(html.unescape(ing)) for ing in ingredients]
|
||||
else:
|
||||
return []
|
||||
def _instruction(line) -> str:
|
||||
clean_line = clean_string(line.strip())
|
||||
# Some sites erroneously escape their strings on multiple levels
|
||||
while not clean_line == (clean_line := clean_string(clean_line)):
|
||||
pass
|
||||
return clean_line
|
||||
|
||||
@staticmethod
|
||||
def yield_amount(yld) -> str:
|
||||
if isinstance(yld, list):
|
||||
return yld[-1]
|
||||
else:
|
||||
return yld
|
||||
|
||||
@staticmethod
|
||||
def time(time_entry):
|
||||
if time_entry is None:
|
||||
return None
|
||||
elif isinstance(time_entry, timedelta):
|
||||
pretty_print_timedelta(time_entry)
|
||||
elif isinstance(time_entry, datetime):
|
||||
print(time_entry)
|
||||
elif isinstance(time_entry, str):
|
||||
if re.match("PT.*H.*M", time_entry):
|
||||
time_delta_object = parse_duration(time_entry)
|
||||
return pretty_print_timedelta(time_delta_object)
|
||||
else:
|
||||
return str(time_entry)
|
||||
def ingredient(ingredients: list) -> str:
|
||||
if ingredients:
|
||||
return [clean_string(ing) for ing in ingredients]
|
||||
else:
|
||||
return []
|
||||
|
||||
|
||||
def yield_amount(yld) -> str:
|
||||
if isinstance(yld, list):
|
||||
return yld[-1]
|
||||
else:
|
||||
return yld
|
||||
|
||||
|
||||
def clean_time(time_entry):
|
||||
if time_entry is None:
|
||||
return None
|
||||
elif isinstance(time_entry, timedelta):
|
||||
pretty_print_timedelta(time_entry)
|
||||
elif isinstance(time_entry, datetime):
|
||||
print(time_entry)
|
||||
elif isinstance(time_entry, str):
|
||||
if re.match("PT.*H.*M", time_entry):
|
||||
time_delta_object = parse_duration(time_entry)
|
||||
return pretty_print_timedelta(time_delta_object)
|
||||
else:
|
||||
return str(time_entry)
|
||||
|
||||
|
||||
# ! TODO: Cleanup Code Below
|
||||
|
@ -1,18 +1,20 @@
|
||||
import json
|
||||
from typing import List
|
||||
from enum import Enum
|
||||
from typing import Any, Callable
|
||||
|
||||
import requests
|
||||
import scrape_schema_recipe
|
||||
from mealie.core import root_logger
|
||||
from fastapi import HTTPException, status
|
||||
from mealie.core.config import app_dirs
|
||||
from mealie.schema.recipe import Recipe
|
||||
from mealie.core.root_logger import get_logger
|
||||
from mealie.schema.recipe import Recipe, RecipeStep
|
||||
from mealie.services.image.image import scrape_image
|
||||
from mealie.services.scraper import open_graph
|
||||
from mealie.services.scraper.cleaner import Cleaner
|
||||
from mealie.services.scraper import cleaner, open_graph
|
||||
from recipe_scrapers import NoSchemaFoundInWildMode, SchemaScraperFactory, WebsiteNotImplementedError, scrape_me
|
||||
|
||||
LAST_JSON = app_dirs.DEBUG_DIR.joinpath("last_recipe.json")
|
||||
|
||||
logger = root_logger.get_logger()
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
def create_from_url(url: str) -> Recipe:
|
||||
@ -25,52 +27,130 @@ def create_from_url(url: str) -> Recipe:
|
||||
Returns:
|
||||
Recipe: Recipe Object
|
||||
"""
|
||||
r = requests.get(url)
|
||||
new_recipe = extract_recipe_from_html(r.text, url)
|
||||
new_recipe = Cleaner.clean(new_recipe, url)
|
||||
new_recipe = download_image_for_recipe(new_recipe)
|
||||
|
||||
return Recipe(**new_recipe)
|
||||
|
||||
|
||||
def extract_recipe_from_html(html: str, url: str) -> dict:
|
||||
try:
|
||||
scraped_recipes: List[dict] = scrape_schema_recipe.loads(html, python_objects=True)
|
||||
dump_last_json(scraped_recipes)
|
||||
|
||||
if not scraped_recipes:
|
||||
scraped_recipes: List[dict] = scrape_schema_recipe.scrape_url(url, python_objects=True)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraped_recipes: List[dict] = scrape_schema_recipe.loads(html)
|
||||
dump_last_json(scraped_recipes)
|
||||
|
||||
if not scraped_recipes:
|
||||
scraped_recipes: List[dict] = scrape_schema_recipe.scrape_url(url)
|
||||
|
||||
if scraped_recipes:
|
||||
new_recipe: dict = scraped_recipes[0]
|
||||
logger.info(f"Recipe Scraped From Web: {new_recipe}")
|
||||
|
||||
if not new_recipe:
|
||||
return "fail" # TODO: Return Better Error Here
|
||||
|
||||
new_recipe = Cleaner.clean(new_recipe, url)
|
||||
else:
|
||||
new_recipe = open_graph.basic_recipe_from_opengraph(html, url)
|
||||
logger.info(f"Recipe Scraped from opengraph metadata: {new_recipe}")
|
||||
new_recipe = scrape_from_url(url)
|
||||
logger.info(f"Image {new_recipe.image}")
|
||||
new_recipe.image = download_image_for_recipe(new_recipe.slug, new_recipe.image)
|
||||
|
||||
return new_recipe
|
||||
|
||||
|
||||
def download_image_for_recipe(recipe: dict) -> dict:
|
||||
try:
|
||||
img_path = scrape_image(recipe.get("image"), recipe.get("slug"))
|
||||
recipe["image"] = img_path.name
|
||||
except Exception:
|
||||
recipe["image"] = "no image"
|
||||
class ParserErrors(str, Enum):
|
||||
bad_recipe = "BAD_RECIPE_DATA"
|
||||
no_recipe_data = "NO_RECIPE_DATA"
|
||||
connection_error = "CONNECTION_ERROR"
|
||||
|
||||
return recipe
|
||||
|
||||
def extract_open_graph_values(url) -> Recipe:
|
||||
r = requests.get(url)
|
||||
recipe = open_graph.basic_recipe_from_opengraph(r.text, url)
|
||||
|
||||
return Recipe(**recipe)
|
||||
|
||||
|
||||
def scrape_from_url(url: str) -> Recipe:
|
||||
"""Entry function to generating are recipe obejct from a url
|
||||
This will determine if a url can be parsed and raise an appropriate error keyword
|
||||
This keyword is used on the frontend to reference a localized string to present on the UI.
|
||||
|
||||
Args:
|
||||
url (str): String Representing the URL
|
||||
|
||||
Raises:
|
||||
HTTPException: 400_BAD_REQUEST - See ParserErrors Class for Key Details
|
||||
|
||||
Returns:
|
||||
Recipe: Recipe Model
|
||||
"""
|
||||
try:
|
||||
scraped_schema = scrape_me(url)
|
||||
except (WebsiteNotImplementedError, AttributeError):
|
||||
try:
|
||||
scraped_schema = scrape_me(url, wild_mode=True)
|
||||
except (NoSchemaFoundInWildMode, AttributeError):
|
||||
recipe = extract_open_graph_values(url)
|
||||
if recipe.name != "":
|
||||
return recipe
|
||||
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.bad_recipe.value})
|
||||
|
||||
except ConnectionError:
|
||||
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.connection_error.value})
|
||||
|
||||
try:
|
||||
instruct = scraped_schema.instructions()
|
||||
except Exception:
|
||||
instruct = []
|
||||
|
||||
try:
|
||||
ing = scraped_schema.ingredients()
|
||||
except Exception:
|
||||
ing = []
|
||||
|
||||
if not instruct and not ing:
|
||||
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.no_recipe_data.value})
|
||||
else:
|
||||
return clean_scraper(scraped_schema, url)
|
||||
|
||||
|
||||
def clean_scraper(scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> Recipe:
|
||||
def try_get_default(func_call: Callable, get_attr: str, default: Any, clean_func=None):
|
||||
value = default
|
||||
try:
|
||||
value = func_call()
|
||||
except Exception:
|
||||
logger.error(f"Error parsing recipe func_call for '{get_attr}'")
|
||||
|
||||
if value == default:
|
||||
try:
|
||||
value = scraped_data.schema.data.get(get_attr)
|
||||
except Exception:
|
||||
logger.error(f"Error parsing recipe attribute '{get_attr}'")
|
||||
|
||||
if clean_func:
|
||||
value = clean_func(value)
|
||||
|
||||
return value
|
||||
|
||||
def get_instructions() -> list[dict]:
|
||||
instruction_as_text = try_get_default(
|
||||
scraped_data.instructions, "recipeInstructions", ["No Instructions Found"]
|
||||
)
|
||||
|
||||
logger.info(f"Scraped Instructions: (Type: {type(instruction_as_text)}) \n {instruction_as_text}")
|
||||
|
||||
instruction_as_text = cleaner.instructions(instruction_as_text)
|
||||
|
||||
logger.info(f"Cleaned Instructions: (Type: {type(instruction_as_text)}) \n {instruction_as_text}")
|
||||
|
||||
try:
|
||||
return [RecipeStep(title="", text=x.get("text")) for x in instruction_as_text]
|
||||
except TypeError:
|
||||
return []
|
||||
|
||||
return Recipe(
|
||||
name=try_get_default(scraped_data.title, "name", "No Name Found", cleaner.clean_string),
|
||||
slug="",
|
||||
image=try_get_default(scraped_data.image, "image", None),
|
||||
description=try_get_default(None, "description", "", cleaner.clean_string),
|
||||
recipe_yield=try_get_default(scraped_data.yields, "recipeYield", "1", cleaner.clean_string),
|
||||
recipe_ingredient=try_get_default(scraped_data.ingredients, "recipeIngredient", [""], cleaner.ingredient),
|
||||
recipe_instructions=get_instructions(),
|
||||
total_time=try_get_default(None, "totalTime", None, cleaner.clean_time),
|
||||
prep_time=try_get_default(None, "prepTime", None, cleaner.clean_time),
|
||||
perform_time=try_get_default(None, "performTime", None, cleaner.clean_time),
|
||||
org_url=url,
|
||||
)
|
||||
|
||||
|
||||
def download_image_for_recipe(slug, image_url) -> dict:
|
||||
img_name = None
|
||||
try:
|
||||
img_path = scrape_image(image_url, slug)
|
||||
img_name = img_path.name
|
||||
except Exception as e:
|
||||
logger.error(f"Error Scraping Image: {e}")
|
||||
img_name = None
|
||||
|
||||
return img_name or "no image"
|
||||
|
||||
|
||||
def dump_last_json(recipe_data: dict):
|
||||
|
19
poetry.lock
generated
19
poetry.lock
generated
@ -939,6 +939,19 @@ python-versions = "*"
|
||||
[package.dependencies]
|
||||
rdflib = ">=4.2.2"
|
||||
|
||||
[[package]]
|
||||
name = "recipe-scrapers"
|
||||
version = "13.2.7"
|
||||
description = "Python package, scraping recipes from all over the internet"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[package.dependencies]
|
||||
beautifulsoup4 = ">=4.6.0"
|
||||
extruct = ">=0.8.0"
|
||||
requests = ">=2.19.1"
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "2021.4.4"
|
||||
@ -1236,7 +1249,7 @@ python-versions = "*"
|
||||
[metadata]
|
||||
lock-version = "1.1"
|
||||
python-versions = "^3.9"
|
||||
content-hash = "73bac73c62e64c90a29816dde9ef1d896e8ca0b4271e67cde6ca8cc56bd87efd"
|
||||
content-hash = "8a123b6b0cf37c1d4a66ea4f137f79bba79f373c7019af879e1b06fb5ded0ed4"
|
||||
|
||||
[metadata.files]
|
||||
aiofiles = [
|
||||
@ -1893,6 +1906,10 @@ rdflib = [
|
||||
rdflib-jsonld = [
|
||||
{file = "rdflib-jsonld-0.5.0.tar.gz", hash = "sha256:4f7d55326405071c7bce9acf5484643bcb984eadb84a6503053367da207105ed"},
|
||||
]
|
||||
recipe-scrapers = [
|
||||
{file = "recipe_scrapers-13.2.7-py3-none-any.whl", hash = "sha256:e5b2a251bbba2ef319ce32a10c4073b23f483f0ee2db83da543204549b06dffe"},
|
||||
{file = "recipe_scrapers-13.2.7.tar.gz", hash = "sha256:e03d20a5c39f9c3dcb0185be1b6480ac0a086900d6aacf1699c77fa090944901"},
|
||||
]
|
||||
regex = [
|
||||
{file = "regex-2021.4.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:619d71c59a78b84d7f18891fe914446d07edd48dc8328c8e149cbe0929b4e000"},
|
||||
{file = "regex-2021.4.4-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:47bf5bf60cf04d72bf6055ae5927a0bd9016096bf3d742fa50d9bf9f45aa0711"},
|
||||
|
@ -33,6 +33,7 @@ lxml = "4.6.2"
|
||||
Pillow = "^8.2.0"
|
||||
pathvalidate = "^2.4.1"
|
||||
apprise = "^0.9.2"
|
||||
recipe-scrapers = "^13.2.7"
|
||||
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
|
@ -2,8 +2,8 @@ import json
|
||||
import re
|
||||
|
||||
import pytest
|
||||
from mealie.services.scraper.cleaner import Cleaner
|
||||
from mealie.services.scraper.scraper import extract_recipe_from_html
|
||||
from mealie.services.scraper import cleaner
|
||||
from mealie.services.scraper.scraper import open_graph
|
||||
from tests.test_config import TEST_RAW_HTML, TEST_RAW_RECIPES
|
||||
|
||||
# https://github.com/django/django/blob/stable/1.3.x/django/core/validators.py#L45
|
||||
@ -39,23 +39,23 @@ url_validation_regex = re.compile(
|
||||
],
|
||||
)
|
||||
def test_cleaner_clean(json_file, num_steps):
|
||||
recipe_data = Cleaner.clean(json.load(open(TEST_RAW_RECIPES.joinpath(json_file))))
|
||||
recipe_data = cleaner.clean(json.load(open(TEST_RAW_RECIPES.joinpath(json_file))))
|
||||
assert len(recipe_data["recipeInstructions"]) == num_steps
|
||||
|
||||
|
||||
def test_clean_category():
|
||||
assert Cleaner.category("my-category") == ["my-category"]
|
||||
assert cleaner.category("my-category") == ["my-category"]
|
||||
|
||||
|
||||
def test_clean_html():
|
||||
assert Cleaner.html("<div>Hello World</div>") == "Hello World"
|
||||
def test_clean_string():
|
||||
assert cleaner.clean_string("<div>Hello World</div>") == "Hello World"
|
||||
|
||||
|
||||
def test_clean_image():
|
||||
assert Cleaner.image(None) == "no image"
|
||||
assert Cleaner.image("https://my.image/path/") == "https://my.image/path/"
|
||||
assert Cleaner.image({"url": "My URL!"}) == "My URL!"
|
||||
assert Cleaner.image(["My URL!", "MY SECOND URL"]) == "My URL!"
|
||||
assert cleaner.image(None) == "no image"
|
||||
assert cleaner.image("https://my.image/path/") == "https://my.image/path/"
|
||||
assert cleaner.image({"url": "My URL!"}) == "My URL!"
|
||||
assert cleaner.image(["My URL!", "MY SECOND URL"]) == "My URL!"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@ -70,7 +70,7 @@ def test_clean_image():
|
||||
],
|
||||
)
|
||||
def test_cleaner_instructions(instructions):
|
||||
assert Cleaner.instructions(instructions) == [
|
||||
assert cleaner.instructions(instructions) == [
|
||||
{"text": "A"},
|
||||
{"text": "B"},
|
||||
{"text": "C"},
|
||||
@ -80,20 +80,18 @@ def test_cleaner_instructions(instructions):
|
||||
def test_html_with_recipe_data():
|
||||
path = TEST_RAW_HTML.joinpath("healthy_pasta_bake_60759.html")
|
||||
url = "https://www.bbc.co.uk/food/recipes/healthy_pasta_bake_60759"
|
||||
recipe_data = extract_recipe_from_html(open(path, encoding="utf8").read(), url)
|
||||
recipe_data = open_graph.basic_recipe_from_opengraph(open(path, encoding="utf8").read(), url)
|
||||
|
||||
assert len(recipe_data["name"]) > 10
|
||||
assert len(recipe_data["slug"]) > 10
|
||||
assert recipe_data["orgURL"] == url
|
||||
assert len(recipe_data["description"]) > 100
|
||||
assert url_validation_regex.match(recipe_data["image"])
|
||||
assert len(recipe_data["recipeIngredient"]) == 13
|
||||
assert len(recipe_data["recipeInstructions"]) == 4
|
||||
|
||||
|
||||
def test_time_cleaner():
|
||||
|
||||
my_time_delta = "PT2H30M"
|
||||
return_delta = Cleaner.time(my_time_delta)
|
||||
return_delta = cleaner.clean_time(my_time_delta)
|
||||
|
||||
assert return_delta == "2 Hours 30 Minutes"
|
||||
|
62
tests/unit_tests/test_recipe_parser.py
Normal file
62
tests/unit_tests/test_recipe_parser.py
Normal file
@ -0,0 +1,62 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
import pytest
|
||||
from mealie.services.scraper import scraper
|
||||
|
||||
|
||||
@dataclass
|
||||
class RecipeSiteTestCase:
|
||||
url: str
|
||||
expected_slug: str
|
||||
num_ingredients: int
|
||||
num_steps: int
|
||||
|
||||
|
||||
test_cases = [
|
||||
RecipeSiteTestCase(
|
||||
url="https://www.seriouseats.com/taiwanese-three-cup-chicken-san-bei-gi-recipe",
|
||||
expected_slug="taiwanese-three-cup-chicken-san-bei-ji-recipe",
|
||||
num_ingredients=10,
|
||||
num_steps=3,
|
||||
),
|
||||
RecipeSiteTestCase(
|
||||
url="https://www.rezeptwelt.de/backen-herzhaft-rezepte/schinken-kaese-waffeln-ohne-viel-schnickschnack/4j0bkiig-94d4d-106529-cfcd2-is97x2ml",
|
||||
expected_slug="schinken-kase-waffeln-ohne-viel-schnickschnack",
|
||||
num_ingredients=7,
|
||||
num_steps=1, # Malformed JSON Data, can't parse steps just get one string
|
||||
),
|
||||
RecipeSiteTestCase(
|
||||
url="https://cookpad.com/us/recipes/5544853-sous-vide-smoked-beef-ribs",
|
||||
expected_slug="sous-vide-smoked-beef-ribs",
|
||||
num_ingredients=7,
|
||||
num_steps=12,
|
||||
),
|
||||
RecipeSiteTestCase(
|
||||
url="https://www.greatbritishchefs.com/recipes/jam-roly-poly-recipe",
|
||||
expected_slug="jam-roly-poly-with-custard",
|
||||
num_ingredients=13,
|
||||
num_steps=9,
|
||||
),
|
||||
RecipeSiteTestCase(
|
||||
url="https://recipes.anovaculinary.com/recipe/sous-vide-shrimp",
|
||||
expected_slug="sous-vide-shrimp",
|
||||
num_ingredients=5,
|
||||
num_steps=0,
|
||||
),
|
||||
RecipeSiteTestCase(
|
||||
url="https://www.bonappetit.com/recipe/detroit-style-pepperoni-pizza",
|
||||
expected_slug="detroit-style-pepperoni-pizza",
|
||||
num_ingredients=8,
|
||||
num_steps=5,
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("recipe_test_data", test_cases)
|
||||
def test_recipe_parser(recipe_test_data: RecipeSiteTestCase):
|
||||
recipe = scraper.create_from_url(recipe_test_data.url)
|
||||
|
||||
assert recipe.slug == recipe_test_data.expected_slug
|
||||
assert len(recipe.recipe_instructions) == recipe_test_data.num_steps
|
||||
assert len(recipe.recipe_ingredient) == recipe_test_data.num_ingredients
|
||||
assert recipe.org_url == recipe_test_data.url
|
Loading…
x
Reference in New Issue
Block a user