mirror of
https://github.com/mealie-recipes/mealie.git
synced 2025-06-23 07:21:41 -04:00
Feature/Capture Scraper Improvement PRs (#749)
* capture #732 changes * capture PR #733 * capture PR #736 * capture pr #745 Co-authored-by: Hayden <hay-kot@pm.me>
This commit is contained in:
parent
58349bc439
commit
89da1a2654
@ -4,7 +4,6 @@ from fastapi import Depends, File
|
|||||||
from fastapi.datastructures import UploadFile
|
from fastapi.datastructures import UploadFile
|
||||||
from fastapi.encoders import jsonable_encoder
|
from fastapi.encoders import jsonable_encoder
|
||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse
|
||||||
from scrape_schema_recipe import scrape_url
|
|
||||||
from sqlalchemy.orm.session import Session
|
from sqlalchemy.orm.session import Session
|
||||||
from starlette.responses import FileResponse
|
from starlette.responses import FileResponse
|
||||||
|
|
||||||
@ -16,7 +15,7 @@ from mealie.routes.routers import UserAPIRouter
|
|||||||
from mealie.schema.recipe import CreateRecipeByURL, Recipe, RecipeImageTypes
|
from mealie.schema.recipe import CreateRecipeByURL, Recipe, RecipeImageTypes
|
||||||
from mealie.schema.recipe.recipe import CreateRecipe, RecipeSummary
|
from mealie.schema.recipe.recipe import CreateRecipe, RecipeSummary
|
||||||
from mealie.services.recipe.recipe_service import RecipeService
|
from mealie.services.recipe.recipe_service import RecipeService
|
||||||
from mealie.services.scraper.scraper import create_from_url
|
from mealie.services.scraper.scraper import create_from_url, scrape_from_url
|
||||||
|
|
||||||
user_router = UserAPIRouter()
|
user_router = UserAPIRouter()
|
||||||
logger = get_logger()
|
logger = get_logger()
|
||||||
@ -44,8 +43,11 @@ def parse_recipe_url(url: CreateRecipeByURL, recipe_service: RecipeService = Dep
|
|||||||
|
|
||||||
@user_router.post("/test-scrape-url")
|
@user_router.post("/test-scrape-url")
|
||||||
def test_parse_recipe_url(url: CreateRecipeByURL):
|
def test_parse_recipe_url(url: CreateRecipeByURL):
|
||||||
# TODO: Replace with more current implementation of testing schema
|
# Debugger should produce the same result as the scraper sees before cleaning
|
||||||
return scrape_url(url.url)
|
scraped_data = scrape_from_url(url.url)
|
||||||
|
if scraped_data:
|
||||||
|
return scraped_data.schema.data
|
||||||
|
return "recipe_scrapers was unable to scrape this URL"
|
||||||
|
|
||||||
|
|
||||||
@user_router.post("/create-from-zip", status_code=201)
|
@user_router.post("/create-from-zip", status_code=201)
|
||||||
|
@ -44,10 +44,24 @@ def write_image(recipe_slug: str, file_data: bytes, extension: str) -> Path:
|
|||||||
def scrape_image(image_url: str, slug: str) -> Path:
|
def scrape_image(image_url: str, slug: str) -> Path:
|
||||||
logger.info(f"Image URL: {image_url}")
|
logger.info(f"Image URL: {image_url}")
|
||||||
if isinstance(image_url, str): # Handles String Types
|
if isinstance(image_url, str): # Handles String Types
|
||||||
image_url = image_url
|
pass
|
||||||
|
|
||||||
if isinstance(image_url, list): # Handles List Types
|
if isinstance(image_url, list): # Handles List Types
|
||||||
image_url = image_url[0]
|
# Multiple images have been defined in the schema - usually different resolutions
|
||||||
|
# Typically would be in smallest->biggest order, but can't be certain so test each.
|
||||||
|
# 'Google will pick the best image to display in Search results based on the aspect ratio and resolution.'
|
||||||
|
|
||||||
|
all_image_requests = []
|
||||||
|
for url in image_url:
|
||||||
|
try:
|
||||||
|
r = requests.get(url, stream=True, headers={"User-Agent": ""})
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Image {url} could not be requested")
|
||||||
|
continue
|
||||||
|
if r.status_code == 200:
|
||||||
|
all_image_requests.append((url, r))
|
||||||
|
|
||||||
|
image_url, _ = max(all_image_requests, key=lambda url_r: len(url_r[1].content), default=("", 0))
|
||||||
|
|
||||||
if isinstance(image_url, dict): # Handles Dictionary Types
|
if isinstance(image_url, dict): # Handles Dictionary Types
|
||||||
for key in image_url:
|
for key in image_url:
|
||||||
@ -70,6 +84,6 @@ def scrape_image(image_url: str, slug: str) -> Path:
|
|||||||
|
|
||||||
filename.unlink(missing_ok=True)
|
filename.unlink(missing_ok=True)
|
||||||
|
|
||||||
return slug
|
return Path(slug)
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
@ -2,7 +2,7 @@ import html
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from typing import List
|
from typing import List, Optional
|
||||||
|
|
||||||
from slugify import slugify
|
from slugify import slugify
|
||||||
|
|
||||||
@ -43,9 +43,13 @@ def clean_string(text: str) -> str:
|
|||||||
if isinstance(text, list):
|
if isinstance(text, list):
|
||||||
text = text[0]
|
text = text[0]
|
||||||
|
|
||||||
|
print(type(text))
|
||||||
|
|
||||||
if text == "" or text is None:
|
if text == "" or text is None:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
print(text)
|
||||||
|
|
||||||
cleaned_text = html.unescape(text)
|
cleaned_text = html.unescape(text)
|
||||||
cleaned_text = re.sub("<[^<]+?>", "", cleaned_text)
|
cleaned_text = re.sub("<[^<]+?>", "", cleaned_text)
|
||||||
cleaned_text = re.sub(" +", " ", cleaned_text)
|
cleaned_text = re.sub(" +", " ", cleaned_text)
|
||||||
@ -67,6 +71,40 @@ def clean_html(raw_html):
|
|||||||
return re.sub(cleanr, "", raw_html)
|
return re.sub(cleanr, "", raw_html)
|
||||||
|
|
||||||
|
|
||||||
|
def clean_nutrition(nutrition: Optional[dict]) -> dict[str, str]:
|
||||||
|
# Assumes that all units are supplied in grams, except sodium which may be in mg.
|
||||||
|
|
||||||
|
# Fn only expects a dict[str,str]. Other structures should not be parsed.
|
||||||
|
if not isinstance(nutrition, dict):
|
||||||
|
return {}
|
||||||
|
|
||||||
|
# Allow for commas as decimals (common in Europe)
|
||||||
|
# Compile once for efficiency
|
||||||
|
re_match_digits = re.compile(r"\d+([.,]\d+)?")
|
||||||
|
|
||||||
|
output_nutrition = {}
|
||||||
|
for key, val in nutrition.items():
|
||||||
|
# If the val contains digits matching the regex, add the first match to the output dict.
|
||||||
|
# Handle unexpected datastructures safely.
|
||||||
|
try:
|
||||||
|
if matched_digits := re_match_digits.search(val):
|
||||||
|
output_nutrition[key] = matched_digits.group(0)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
output_nutrition = {key: val.replace(",", ".") for key, val in output_nutrition.items()}
|
||||||
|
|
||||||
|
if "sodiumContent" in nutrition and "m" not in nutrition["sodiumContent"] and "g" in nutrition["sodiumContent"]:
|
||||||
|
# Sodium is in grams. Parse its value, multiple by 1k and return to string.
|
||||||
|
try:
|
||||||
|
output_nutrition["sodiumContent"] = str(float(output_nutrition["sodiumContent"]) * 1000)
|
||||||
|
except ValueError:
|
||||||
|
# Could not parse sodium content as float, so don't touch it.
|
||||||
|
pass
|
||||||
|
|
||||||
|
return output_nutrition
|
||||||
|
|
||||||
|
|
||||||
def image(image=None) -> str:
|
def image(image=None) -> str:
|
||||||
if not image:
|
if not image:
|
||||||
return "no image"
|
return "no image"
|
||||||
@ -167,9 +205,11 @@ def clean_time(time_entry):
|
|||||||
elif isinstance(time_entry, datetime):
|
elif isinstance(time_entry, datetime):
|
||||||
print(time_entry)
|
print(time_entry)
|
||||||
elif isinstance(time_entry, str):
|
elif isinstance(time_entry, str):
|
||||||
if re.match("PT.*H.*M", time_entry):
|
try:
|
||||||
time_delta_object = parse_duration(time_entry)
|
time_delta_object = parse_duration(time_entry)
|
||||||
return pretty_print_timedelta(time_delta_object)
|
return pretty_print_timedelta(time_delta_object)
|
||||||
|
except ValueError:
|
||||||
|
logger.error(f"Could not parse time_entry `{time_entry}`")
|
||||||
else:
|
else:
|
||||||
return str(time_entry)
|
return str(time_entry)
|
||||||
|
|
||||||
@ -184,48 +224,34 @@ def parse_duration(iso_duration):
|
|||||||
Returns:
|
Returns:
|
||||||
a datetime.timedelta instance
|
a datetime.timedelta instance
|
||||||
"""
|
"""
|
||||||
m = re.match(r"^P(?:(\d+)Y)?(?:(\d+)M)?(?:(\d+)D)?T(?:(\d+)H)?(?:(\d+)M)?(?:(\d+(?:.\d+)?)S)?$", iso_duration)
|
m = re.match(
|
||||||
|
r"^P((\d+)Y)?((\d+)M)?((?P<days>\d+)D)?"
|
||||||
|
r"T((?P<hours>\d+)H)?((?P<minutes>\d+)M)?((?P<seconds>\d+(?:\.\d+)?)S)?$",
|
||||||
|
iso_duration,
|
||||||
|
)
|
||||||
if m is None:
|
if m is None:
|
||||||
raise ValueError("invalid ISO 8601 duration string")
|
raise ValueError("invalid ISO 8601 duration string")
|
||||||
|
|
||||||
days = 0
|
|
||||||
hours = 0
|
|
||||||
minutes = 0
|
|
||||||
seconds = 0.0
|
|
||||||
|
|
||||||
# Years and months are not being utilized here, as there is not enough
|
# Years and months are not being utilized here, as there is not enough
|
||||||
# information provided to determine which year and which month.
|
# information provided to determine which year and which month.
|
||||||
# Python's time_delta class stores durations as days, seconds and
|
# Python's time_delta class stores durations as days, seconds and
|
||||||
# microseconds internally, and therefore we'd have to
|
# microseconds internally, and therefore we'd have to
|
||||||
# convert parsed years and months to specific number of days.
|
# convert parsed years and months to specific number of days.
|
||||||
|
|
||||||
if m[3]:
|
times = {"days": 0, "hours": 0, "minutes": 0, "seconds": 0}
|
||||||
days = int(m[3])
|
for unit, _ in times.items():
|
||||||
if m[4]:
|
if m.group(unit):
|
||||||
hours = int(m[4])
|
times[unit] = int(float(m.group(unit)))
|
||||||
if m[5]:
|
|
||||||
minutes = int(m[5])
|
|
||||||
if m[6]:
|
|
||||||
seconds = float(m[6])
|
|
||||||
|
|
||||||
return timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds)
|
return timedelta(**times)
|
||||||
|
|
||||||
|
|
||||||
def pretty_print_timedelta(t, max_components=None, max_decimal_places=2):
|
def pretty_print_timedelta(t: timedelta, max_components=None, max_decimal_places=2):
|
||||||
"""
|
"""
|
||||||
Print a pretty string for a timedelta.
|
Print a pretty string for a timedelta.
|
||||||
For example datetime.timedelta(days=2, seconds=17280) will be printed as '2 days, 4 hours, 48 minutes'. Setting max_components to e.g. 1 will change this to '2.2 days', where the
|
For example datetime.timedelta(days=2, seconds=17280) will be printed as '2 days 4 Hours 48 Minutes'. Setting max_components to e.g. 1 will change this to '2.2 days', where the
|
||||||
number of decimal points can also be set.
|
number of decimal points can also be set.
|
||||||
"""
|
"""
|
||||||
time_scales = [
|
|
||||||
timedelta(days=365),
|
|
||||||
timedelta(days=1),
|
|
||||||
timedelta(hours=1),
|
|
||||||
timedelta(minutes=1),
|
|
||||||
timedelta(seconds=1),
|
|
||||||
timedelta(microseconds=1000),
|
|
||||||
timedelta(microseconds=1),
|
|
||||||
]
|
|
||||||
time_scale_names_dict = {
|
time_scale_names_dict = {
|
||||||
timedelta(days=365): "year",
|
timedelta(days=365): "year",
|
||||||
timedelta(days=1): "day",
|
timedelta(days=1): "day",
|
||||||
@ -236,9 +262,8 @@ def pretty_print_timedelta(t, max_components=None, max_decimal_places=2):
|
|||||||
timedelta(microseconds=1): "microsecond",
|
timedelta(microseconds=1): "microsecond",
|
||||||
}
|
}
|
||||||
count = 0
|
count = 0
|
||||||
txt = ""
|
out_list = []
|
||||||
first = True
|
for scale, scale_name in time_scale_names_dict.items():
|
||||||
for scale in time_scales:
|
|
||||||
if t >= scale:
|
if t >= scale:
|
||||||
count += 1
|
count += 1
|
||||||
n = t / scale if count == max_components else int(t / scale)
|
n = t / scale if count == max_components else int(t / scale)
|
||||||
@ -247,15 +272,9 @@ def pretty_print_timedelta(t, max_components=None, max_decimal_places=2):
|
|||||||
n_txt = str(round(n, max_decimal_places))
|
n_txt = str(round(n, max_decimal_places))
|
||||||
if n_txt[-2:] == ".0":
|
if n_txt[-2:] == ".0":
|
||||||
n_txt = n_txt[:-2]
|
n_txt = n_txt[:-2]
|
||||||
txt += "{}{} {}{}".format(
|
|
||||||
"" if first else " ",
|
|
||||||
n_txt,
|
|
||||||
time_scale_names_dict[scale],
|
|
||||||
"s" if n > 1 else "",
|
|
||||||
)
|
|
||||||
if first:
|
|
||||||
first = False
|
|
||||||
|
|
||||||
if len(txt) == 0:
|
out_list.append(f"{n_txt} {scale_name}{'s' if n > 1 else ''}")
|
||||||
txt = "none"
|
|
||||||
return txt
|
if out_list == []:
|
||||||
|
return "none"
|
||||||
|
return " ".join(out_list)
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
import json
|
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Any, Callable
|
from typing import Any, Callable, Optional
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
@ -8,17 +7,11 @@ from fastapi import HTTPException, status
|
|||||||
from recipe_scrapers import NoSchemaFoundInWildMode, SchemaScraperFactory, WebsiteNotImplementedError, scrape_me
|
from recipe_scrapers import NoSchemaFoundInWildMode, SchemaScraperFactory, WebsiteNotImplementedError, scrape_me
|
||||||
from slugify import slugify
|
from slugify import slugify
|
||||||
|
|
||||||
from mealie.core.config import get_app_dirs
|
|
||||||
|
|
||||||
app_dirs = get_app_dirs()
|
|
||||||
from mealie.core.root_logger import get_logger
|
from mealie.core.root_logger import get_logger
|
||||||
from mealie.schema.recipe import Recipe, RecipeStep
|
from mealie.schema.recipe import Recipe, RecipeStep
|
||||||
from mealie.services.image.image import scrape_image
|
from mealie.services.image.image import scrape_image
|
||||||
from mealie.services.scraper import cleaner, open_graph
|
from mealie.services.scraper import cleaner, open_graph
|
||||||
|
|
||||||
LAST_JSON = app_dirs.DEBUG_DIR.joinpath("last_recipe.json")
|
|
||||||
|
|
||||||
|
|
||||||
logger = get_logger()
|
logger = get_logger()
|
||||||
|
|
||||||
|
|
||||||
@ -32,7 +25,14 @@ def create_from_url(url: str) -> Recipe:
|
|||||||
Returns:
|
Returns:
|
||||||
Recipe: Recipe Object
|
Recipe: Recipe Object
|
||||||
"""
|
"""
|
||||||
new_recipe = scrape_from_url(url)
|
# Try the different scrapers in order.
|
||||||
|
if scraped_data := scrape_from_url(url):
|
||||||
|
new_recipe = clean_scraper(scraped_data, url)
|
||||||
|
elif og_dict := extract_open_graph_values(url):
|
||||||
|
new_recipe = Recipe(**og_dict)
|
||||||
|
else:
|
||||||
|
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})
|
||||||
|
|
||||||
logger.info(f"Image {new_recipe.image}")
|
logger.info(f"Image {new_recipe.image}")
|
||||||
new_recipe.image = download_image_for_recipe(new_recipe.slug, new_recipe.image)
|
new_recipe.image = download_image_for_recipe(new_recipe.slug, new_recipe.image)
|
||||||
|
|
||||||
@ -49,16 +49,17 @@ class ParserErrors(str, Enum):
|
|||||||
CONNECTION_ERROR = "CONNECTION_ERROR"
|
CONNECTION_ERROR = "CONNECTION_ERROR"
|
||||||
|
|
||||||
|
|
||||||
def extract_open_graph_values(url) -> Recipe:
|
def extract_open_graph_values(url) -> Optional[dict]:
|
||||||
r = requests.get(url)
|
r = requests.get(url)
|
||||||
recipe = open_graph.basic_recipe_from_opengraph(r.text, url)
|
recipe = open_graph.basic_recipe_from_opengraph(r.text, url)
|
||||||
|
if recipe.get("name", "") == "":
|
||||||
return Recipe(**recipe)
|
return None
|
||||||
|
return recipe
|
||||||
|
|
||||||
|
|
||||||
def scrape_from_url(url: str) -> Recipe:
|
def scrape_from_url(url: str):
|
||||||
"""Entry function to generating are recipe obejct from a url
|
"""Entry function to scrape a recipe from a url
|
||||||
This will determine if a url can be parsed and raise an appropriate error keyword
|
This will determine if a url can be parsed and return None if not, to allow another parser to try.
|
||||||
This keyword is used on the frontend to reference a localized string to present on the UI.
|
This keyword is used on the frontend to reference a localized string to present on the UI.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -68,7 +69,7 @@ def scrape_from_url(url: str) -> Recipe:
|
|||||||
HTTPException: 400_BAD_REQUEST - See ParserErrors Class for Key Details
|
HTTPException: 400_BAD_REQUEST - See ParserErrors Class for Key Details
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Recipe: Recipe Model
|
Optional[Scraped schema for cleaning]
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
scraped_schema = scrape_me(url)
|
scraped_schema = scrape_me(url)
|
||||||
@ -76,28 +77,26 @@ def scrape_from_url(url: str) -> Recipe:
|
|||||||
try:
|
try:
|
||||||
scraped_schema = scrape_me(url, wild_mode=True)
|
scraped_schema = scrape_me(url, wild_mode=True)
|
||||||
except (NoSchemaFoundInWildMode, AttributeError):
|
except (NoSchemaFoundInWildMode, AttributeError):
|
||||||
recipe = extract_open_graph_values(url)
|
# Recipe_scraper was unable to extract a recipe.
|
||||||
if recipe.name != "":
|
return None
|
||||||
return recipe
|
|
||||||
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})
|
|
||||||
|
|
||||||
except ConnectionError:
|
except ConnectionError:
|
||||||
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.CONNECTION_ERROR.value})
|
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.CONNECTION_ERROR.value})
|
||||||
|
|
||||||
|
# Check to see if the recipe is valid
|
||||||
try:
|
try:
|
||||||
|
ingredients = scraped_schema.ingredients()
|
||||||
instruct = scraped_schema.instructions()
|
instruct = scraped_schema.instructions()
|
||||||
except Exception:
|
except Exception:
|
||||||
|
ingredients = []
|
||||||
instruct = []
|
instruct = []
|
||||||
|
|
||||||
try:
|
if instruct and ingredients:
|
||||||
ing = scraped_schema.ingredients()
|
return scraped_schema
|
||||||
except Exception:
|
|
||||||
ing = []
|
|
||||||
|
|
||||||
if not instruct and not ing:
|
# recipe_scrapers did not get a valid recipe.
|
||||||
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.NO_RECIPE_DATA.value})
|
# Return None to let another scraper try.
|
||||||
else:
|
return None
|
||||||
return clean_scraper(scraped_schema, url)
|
|
||||||
|
|
||||||
|
|
||||||
def clean_scraper(scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> Recipe:
|
def clean_scraper(scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> Recipe:
|
||||||
@ -135,17 +134,22 @@ def clean_scraper(scraped_data: SchemaScraperFactory.SchemaScraper, url: str) ->
|
|||||||
except TypeError:
|
except TypeError:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
cook_time = try_get_default(None, "performTime", None, cleaner.clean_time) or try_get_default(
|
||||||
|
None, "cookTime", None, cleaner.clean_time
|
||||||
|
)
|
||||||
|
|
||||||
return Recipe(
|
return Recipe(
|
||||||
name=try_get_default(scraped_data.title, "name", "No Name Found", cleaner.clean_string),
|
name=try_get_default(scraped_data.title, "name", "No Name Found", cleaner.clean_string),
|
||||||
slug="",
|
slug="",
|
||||||
image=try_get_default(scraped_data.image, "image", None),
|
image=try_get_default(None, "image", None),
|
||||||
description=try_get_default(None, "description", "", cleaner.clean_string),
|
description=try_get_default(None, "description", "", cleaner.clean_string),
|
||||||
|
nutrition=try_get_default(None, "nutrition", None, cleaner.clean_nutrition),
|
||||||
recipe_yield=try_get_default(scraped_data.yields, "recipeYield", "1", cleaner.clean_string),
|
recipe_yield=try_get_default(scraped_data.yields, "recipeYield", "1", cleaner.clean_string),
|
||||||
recipe_ingredient=try_get_default(scraped_data.ingredients, "recipeIngredient", [""], cleaner.ingredient),
|
recipe_ingredient=try_get_default(scraped_data.ingredients, "recipeIngredient", [""], cleaner.ingredient),
|
||||||
recipe_instructions=get_instructions(),
|
recipe_instructions=get_instructions(),
|
||||||
total_time=try_get_default(None, "totalTime", None, cleaner.clean_time),
|
total_time=try_get_default(None, "totalTime", None, cleaner.clean_time),
|
||||||
prep_time=try_get_default(None, "prepTime", None, cleaner.clean_time),
|
prep_time=try_get_default(None, "prepTime", None, cleaner.clean_time),
|
||||||
perform_time=try_get_default(None, "performTime", None, cleaner.clean_time),
|
perform_time=cook_time,
|
||||||
org_url=url,
|
org_url=url,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -160,10 +164,3 @@ def download_image_for_recipe(slug, image_url) -> dict:
|
|||||||
img_name = None
|
img_name = None
|
||||||
|
|
||||||
return img_name or "no image"
|
return img_name or "no image"
|
||||||
|
|
||||||
|
|
||||||
def dump_last_json(recipe_data: dict):
|
|
||||||
with open(LAST_JSON, "w") as f:
|
|
||||||
f.write(json.dumps(recipe_data, indent=4, default=str))
|
|
||||||
|
|
||||||
return
|
|
||||||
|
765
poetry.lock
generated
765
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -23,7 +23,6 @@ python-slugify = "^4.0.1"
|
|||||||
requests = "^2.25.1"
|
requests = "^2.25.1"
|
||||||
PyYAML = "^5.3.1"
|
PyYAML = "^5.3.1"
|
||||||
extruct = "^0.12.0"
|
extruct = "^0.12.0"
|
||||||
scrape-schema-recipe = "^0.1.3"
|
|
||||||
python-multipart = "^0.0.5"
|
python-multipart = "^0.0.5"
|
||||||
fastapi-camelcase = "^1.0.2"
|
fastapi-camelcase = "^1.0.2"
|
||||||
bcrypt = "^3.2.0"
|
bcrypt = "^3.2.0"
|
||||||
@ -50,6 +49,7 @@ coverage = "^5.5"
|
|||||||
pydantic-to-typescript = "^1.0.7"
|
pydantic-to-typescript = "^1.0.7"
|
||||||
rich = "^10.7.0"
|
rich = "^10.7.0"
|
||||||
isort = "^5.9.3"
|
isort = "^5.9.3"
|
||||||
|
regex = "2021.9.30" # TODO: Remove during Upgrade -> https://github.com/psf/black/issues/2524
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["poetry-core>=1.0.0"]
|
requires = ["poetry-core>=1.0.0"]
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
from datetime import timedelta
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
@ -59,6 +60,24 @@ def test_clean_image():
|
|||||||
assert cleaner.image(["My URL!", "MY SECOND URL"]) == "My URL!"
|
assert cleaner.image(["My URL!", "MY SECOND URL"]) == "My URL!"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"nutrition,expected",
|
||||||
|
[
|
||||||
|
(None, {}),
|
||||||
|
({"calories": "105 kcal"}, {"calories": "105"}),
|
||||||
|
({"calories": "105 kcal 104 sugar"}, {"calories": "105"}),
|
||||||
|
({"calories": ""}, {}),
|
||||||
|
({"calories": ["not just a string"], "sugarContent": "but still tries 555.321"}, {"sugarContent": "555.321"}),
|
||||||
|
({"sodiumContent": "5.1235g"}, {"sodiumContent": "5123.5"}),
|
||||||
|
({"sodiumContent": "5mg"}, {"sodiumContent": "5"}),
|
||||||
|
({"sodiumContent": "10oz"}, {"sodiumContent": "10"}),
|
||||||
|
({"sodiumContent": "10.1.2g"}, {"sodiumContent": "10100.0"}),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_clean_nutrition(nutrition, expected):
|
||||||
|
assert cleaner.clean_nutrition(nutrition) == expected
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"instructions",
|
"instructions",
|
||||||
[
|
[
|
||||||
@ -90,9 +109,29 @@ def test_html_with_recipe_data():
|
|||||||
assert url_validation_regex.match(recipe_data["image"])
|
assert url_validation_regex.match(recipe_data["image"])
|
||||||
|
|
||||||
|
|
||||||
def test_time_cleaner():
|
@pytest.mark.parametrize(
|
||||||
|
"time_delta,expected",
|
||||||
|
[
|
||||||
|
("PT2H30M", "2 Hours 30 Minutes"),
|
||||||
|
("PT30M", "30 Minutes"),
|
||||||
|
("PT3H", "3 Hours"),
|
||||||
|
("P1DT1H1M1S", "1 day 1 Hour 1 Minute 1 Second"),
|
||||||
|
("P1DT1H1M1.53S", "1 day 1 Hour 1 Minute 1 Second"),
|
||||||
|
("PT-3H", None),
|
||||||
|
("PT", "none"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_time_cleaner(time_delta, expected):
|
||||||
|
assert cleaner.clean_time(time_delta) == expected
|
||||||
|
|
||||||
my_time_delta = "PT2H30M"
|
|
||||||
return_delta = cleaner.clean_time(my_time_delta)
|
|
||||||
|
|
||||||
assert return_delta == "2 Hours 30 Minutes"
|
@pytest.mark.parametrize(
|
||||||
|
"t,max_components,max_decimal_places,expected",
|
||||||
|
[
|
||||||
|
(timedelta(days=2, seconds=17280), None, 2, "2 days 4 Hours 48 Minutes"),
|
||||||
|
(timedelta(days=2, seconds=17280), 1, 2, "2.2 days"),
|
||||||
|
(timedelta(days=365), None, 2, "1 year"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_pretty_print_timedelta(t, max_components, max_decimal_places, expected):
|
||||||
|
assert cleaner.pretty_print_timedelta(t, max_components, max_decimal_places) == expected
|
||||||
|
Loading…
x
Reference in New Issue
Block a user