mirror of
https://github.com/mealie-recipes/mealie.git
synced 2025-07-07 10:15:03 -04:00
Merge pull request #2810 from michael-genson/fix/translation-issues-when-scraping
fix: Translate ISO 8601 Datetime Durations During Scraping/Parsing/Migrating
This commit is contained in:
commit
3174216931
@ -31,5 +31,14 @@
|
|||||||
"generic-updated-with-url": "{name} has been updated, {url}",
|
"generic-updated-with-url": "{name} has been updated, {url}",
|
||||||
"generic-duplicated": "{name} has been duplicated",
|
"generic-duplicated": "{name} has been duplicated",
|
||||||
"generic-deleted": "{name} has been deleted"
|
"generic-deleted": "{name} has been deleted"
|
||||||
|
},
|
||||||
|
"datetime": {
|
||||||
|
"year": "year|years",
|
||||||
|
"day": "day|days",
|
||||||
|
"hour": "hour|hours",
|
||||||
|
"minute": "minute|minutes",
|
||||||
|
"second": "second|seconds",
|
||||||
|
"millisecond": "millisecond|milliseconds",
|
||||||
|
"microsecond": "microsecond|microseconds"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import json
|
import json
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import cast
|
||||||
|
|
||||||
|
|
||||||
@dataclass(slots=True)
|
@dataclass(slots=True)
|
||||||
@ -13,6 +14,22 @@ class JsonProvider:
|
|||||||
else:
|
else:
|
||||||
self.translations = path
|
self.translations = path
|
||||||
|
|
||||||
|
def _parse_plurals(self, value: str, count: float):
|
||||||
|
# based off of: https://kazupon.github.io/vue-i18n/guide/pluralization.html
|
||||||
|
|
||||||
|
values = [v.strip() for v in value.split("|")]
|
||||||
|
if len(values) == 1:
|
||||||
|
return value
|
||||||
|
elif len(values) == 2:
|
||||||
|
return values[0] if count == 1 else values[1]
|
||||||
|
elif len(values) == 3:
|
||||||
|
if count == 0:
|
||||||
|
return values[0]
|
||||||
|
else:
|
||||||
|
return values[1] if count == 1 else values[2]
|
||||||
|
else:
|
||||||
|
return values[0]
|
||||||
|
|
||||||
def t(self, key: str, default=None, **kwargs) -> str:
|
def t(self, key: str, default=None, **kwargs) -> str:
|
||||||
keys = key.split(".")
|
keys = key.split(".")
|
||||||
|
|
||||||
@ -30,9 +47,12 @@ class JsonProvider:
|
|||||||
|
|
||||||
if i == last:
|
if i == last:
|
||||||
for key, value in kwargs.items():
|
for key, value in kwargs.items():
|
||||||
if not value:
|
translation_value = cast(str, translation_value)
|
||||||
|
if value is None:
|
||||||
value = ""
|
value = ""
|
||||||
translation_value = translation_value.replace("{" + key + "}", value)
|
if key == "count":
|
||||||
return translation_value
|
translation_value = self._parse_plurals(translation_value, float(value))
|
||||||
|
translation_value = translation_value.replace("{" + key + "}", str(value)) # type: ignore
|
||||||
|
return translation_value # type: ignore
|
||||||
|
|
||||||
return default or key
|
return default or key
|
||||||
|
@ -44,6 +44,7 @@ class GroupMigrationController(BaseUserController):
|
|||||||
"user_id": self.user.id,
|
"user_id": self.user.id,
|
||||||
"group_id": self.group_id,
|
"group_id": self.group_id,
|
||||||
"add_migration_tag": add_migration_tag,
|
"add_migration_tag": add_migration_tag,
|
||||||
|
"translator": self.translator,
|
||||||
}
|
}
|
||||||
|
|
||||||
table: dict[SupportedMigrations, type[BaseMigrator]] = {
|
table: dict[SupportedMigrations, type[BaseMigrator]] = {
|
||||||
|
@ -164,7 +164,7 @@ class RecipeController(BaseRecipeController):
|
|||||||
async def parse_recipe_url(self, req: ScrapeRecipe):
|
async def parse_recipe_url(self, req: ScrapeRecipe):
|
||||||
"""Takes in a URL and attempts to scrape data and load it into the database"""
|
"""Takes in a URL and attempts to scrape data and load it into the database"""
|
||||||
try:
|
try:
|
||||||
recipe, extras = await create_from_url(req.url)
|
recipe, extras = await create_from_url(req.url, self.translator)
|
||||||
except ForceTimeoutException as e:
|
except ForceTimeoutException as e:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=408, detail=ErrorResponse.respond(message="Recipe Scraping Timed Out")
|
status_code=408, detail=ErrorResponse.respond(message="Recipe Scraping Timed Out")
|
||||||
@ -193,7 +193,7 @@ class RecipeController(BaseRecipeController):
|
|||||||
@router.post("/create-url/bulk", status_code=202)
|
@router.post("/create-url/bulk", status_code=202)
|
||||||
def parse_recipe_url_bulk(self, bulk: CreateRecipeByUrlBulk, bg_tasks: BackgroundTasks):
|
def parse_recipe_url_bulk(self, bulk: CreateRecipeByUrlBulk, bg_tasks: BackgroundTasks):
|
||||||
"""Takes in a URL and attempts to scrape data and load it into the database"""
|
"""Takes in a URL and attempts to scrape data and load it into the database"""
|
||||||
bulk_scraper = RecipeBulkScraperService(self.service, self.repos, self.group)
|
bulk_scraper = RecipeBulkScraperService(self.service, self.repos, self.group, self.translator)
|
||||||
report_id = bulk_scraper.get_report_id()
|
report_id = bulk_scraper.get_report_id()
|
||||||
bg_tasks.add_task(bulk_scraper.scrape, bulk)
|
bg_tasks.add_task(bulk_scraper.scrape, bulk)
|
||||||
|
|
||||||
@ -208,7 +208,7 @@ class RecipeController(BaseRecipeController):
|
|||||||
async def test_parse_recipe_url(self, url: ScrapeRecipeTest):
|
async def test_parse_recipe_url(self, url: ScrapeRecipeTest):
|
||||||
# Debugger should produce the same result as the scraper sees before cleaning
|
# Debugger should produce the same result as the scraper sees before cleaning
|
||||||
try:
|
try:
|
||||||
if scraped_data := await RecipeScraperPackage(url.url).scrape_url():
|
if scraped_data := await RecipeScraperPackage(url.url, self.translator).scrape_url():
|
||||||
return scraped_data.schema.data
|
return scraped_data.schema.data
|
||||||
except ForceTimeoutException as e:
|
except ForceTimeoutException as e:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
|
@ -6,6 +6,7 @@ from pydantic import UUID4
|
|||||||
|
|
||||||
from mealie.core import root_logger
|
from mealie.core import root_logger
|
||||||
from mealie.core.exceptions import UnexpectedNone
|
from mealie.core.exceptions import UnexpectedNone
|
||||||
|
from mealie.lang.providers import Translator
|
||||||
from mealie.repos.all_repositories import AllRepositories
|
from mealie.repos.all_repositories import AllRepositories
|
||||||
from mealie.schema.recipe import Recipe
|
from mealie.schema.recipe import Recipe
|
||||||
from mealie.schema.recipe.recipe_settings import RecipeSettings
|
from mealie.schema.recipe.recipe_settings import RecipeSettings
|
||||||
@ -35,12 +36,20 @@ class BaseMigrator(BaseService):
|
|||||||
helpers: DatabaseMigrationHelpers
|
helpers: DatabaseMigrationHelpers
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, archive: Path, db: AllRepositories, session, user_id: UUID4, group_id: UUID, add_migration_tag: bool
|
self,
|
||||||
|
archive: Path,
|
||||||
|
db: AllRepositories,
|
||||||
|
session,
|
||||||
|
user_id: UUID4,
|
||||||
|
group_id: UUID,
|
||||||
|
add_migration_tag: bool,
|
||||||
|
translator: Translator,
|
||||||
):
|
):
|
||||||
self.archive = archive
|
self.archive = archive
|
||||||
self.db = db
|
self.db = db
|
||||||
self.session = session
|
self.session = session
|
||||||
self.add_migration_tag = add_migration_tag
|
self.add_migration_tag = add_migration_tag
|
||||||
|
self.translator = translator
|
||||||
|
|
||||||
user = db.users.get_one(user_id)
|
user = db.users.get_one(user_id)
|
||||||
if not user:
|
if not user:
|
||||||
@ -229,6 +238,6 @@ class BaseMigrator(BaseService):
|
|||||||
with contextlib.suppress(KeyError):
|
with contextlib.suppress(KeyError):
|
||||||
del recipe_dict["id"]
|
del recipe_dict["id"]
|
||||||
|
|
||||||
recipe_dict = cleaner.clean(recipe_dict, url=recipe_dict.get("org_url", None))
|
recipe_dict = cleaner.clean(recipe_dict, self.translator, url=recipe_dict.get("org_url", None))
|
||||||
|
|
||||||
return Recipe(**recipe_dict)
|
return Recipe(**recipe_dict)
|
||||||
|
@ -10,6 +10,7 @@ from datetime import datetime, timedelta
|
|||||||
from slugify import slugify
|
from slugify import slugify
|
||||||
|
|
||||||
from mealie.core.root_logger import get_logger
|
from mealie.core.root_logger import get_logger
|
||||||
|
from mealie.lang.providers import Translator
|
||||||
|
|
||||||
logger = get_logger("recipe-scraper")
|
logger = get_logger("recipe-scraper")
|
||||||
|
|
||||||
@ -32,7 +33,7 @@ MATCH_ERRONEOUS_WHITE_SPACE = re.compile(r"\n\s*\n")
|
|||||||
""" Matches multiple new lines and removes erroneous white space """
|
""" Matches multiple new lines and removes erroneous white space """
|
||||||
|
|
||||||
|
|
||||||
def clean(recipe_data: dict, url=None) -> dict:
|
def clean(recipe_data: dict, translator: Translator, url=None) -> dict:
|
||||||
"""Main entrypoint to clean a recipe extracted from the web
|
"""Main entrypoint to clean a recipe extracted from the web
|
||||||
and format the data into an accectable format for the database
|
and format the data into an accectable format for the database
|
||||||
|
|
||||||
@ -45,9 +46,9 @@ def clean(recipe_data: dict, url=None) -> dict:
|
|||||||
recipe_data["description"] = clean_string(recipe_data.get("description", ""))
|
recipe_data["description"] = clean_string(recipe_data.get("description", ""))
|
||||||
|
|
||||||
# Times
|
# Times
|
||||||
recipe_data["prepTime"] = clean_time(recipe_data.get("prepTime"))
|
recipe_data["prepTime"] = clean_time(recipe_data.get("prepTime"), translator)
|
||||||
recipe_data["performTime"] = clean_time(recipe_data.get("performTime"))
|
recipe_data["performTime"] = clean_time(recipe_data.get("performTime"), translator)
|
||||||
recipe_data["totalTime"] = clean_time(recipe_data.get("totalTime"))
|
recipe_data["totalTime"] = clean_time(recipe_data.get("totalTime"), translator)
|
||||||
recipe_data["recipeCategory"] = clean_categories(recipe_data.get("recipeCategory", []))
|
recipe_data["recipeCategory"] = clean_categories(recipe_data.get("recipeCategory", []))
|
||||||
recipe_data["recipeYield"] = clean_yield(recipe_data.get("recipeYield"))
|
recipe_data["recipeYield"] = clean_yield(recipe_data.get("recipeYield"))
|
||||||
recipe_data["recipeIngredient"] = clean_ingredients(recipe_data.get("recipeIngredient", []))
|
recipe_data["recipeIngredient"] = clean_ingredients(recipe_data.get("recipeIngredient", []))
|
||||||
@ -335,7 +336,7 @@ def clean_yield(yld: str | list[str] | None) -> str:
|
|||||||
return yld
|
return yld
|
||||||
|
|
||||||
|
|
||||||
def clean_time(time_entry: str | timedelta | None) -> None | str:
|
def clean_time(time_entry: str | timedelta | None, translator: Translator) -> None | str:
|
||||||
"""_summary_
|
"""_summary_
|
||||||
|
|
||||||
Supported Structures:
|
Supported Structures:
|
||||||
@ -361,11 +362,11 @@ def clean_time(time_entry: str | timedelta | None) -> None | str:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
time_delta_instructionsect = parse_duration(time_entry)
|
time_delta_instructionsect = parse_duration(time_entry)
|
||||||
return pretty_print_timedelta(time_delta_instructionsect)
|
return pretty_print_timedelta(time_delta_instructionsect, translator)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return str(time_entry)
|
return str(time_entry)
|
||||||
case timedelta():
|
case timedelta():
|
||||||
return pretty_print_timedelta(time_entry)
|
return pretty_print_timedelta(time_entry, translator)
|
||||||
case {"minValue": str(value)}:
|
case {"minValue": str(value)}:
|
||||||
return clean_time(value)
|
return clean_time(value)
|
||||||
case [str(), *_]:
|
case [str(), *_]:
|
||||||
@ -374,7 +375,7 @@ def clean_time(time_entry: str | timedelta | None) -> None | str:
|
|||||||
# TODO: Not sure what to do here
|
# TODO: Not sure what to do here
|
||||||
return str(time_entry)
|
return str(time_entry)
|
||||||
case _:
|
case _:
|
||||||
logger.warning("[SCRAPER] Unexpected type or structure for time_entrys")
|
logger.warning("[SCRAPER] Unexpected type or structure for variable time_entry")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
@ -408,25 +409,25 @@ def parse_duration(iso_duration: str) -> timedelta:
|
|||||||
return timedelta(**times)
|
return timedelta(**times)
|
||||||
|
|
||||||
|
|
||||||
def pretty_print_timedelta(t: timedelta, max_components=None, max_decimal_places=2):
|
def pretty_print_timedelta(t: timedelta, translator: Translator, max_components=None, max_decimal_places=2):
|
||||||
"""
|
"""
|
||||||
Print a pretty string for a timedelta.
|
Print a pretty string for a timedelta.
|
||||||
For example datetime.timedelta(days=2, seconds=17280) will be printed as '2 days 4 Hours 48 Minutes'.
|
For example datetime.timedelta(days=2, seconds=17280) will be printed as '2 days 4 Hours 48 Minutes'.
|
||||||
Setting max_components to e.g. 1 will change this to '2.2 days', where the number of decimal
|
Setting max_components to e.g. 1 will change this to '2.2 days', where the number of decimal
|
||||||
points can also be set.
|
points can also be set.
|
||||||
"""
|
"""
|
||||||
time_scale_names_dict = {
|
time_scale_translation_keys_dict = {
|
||||||
timedelta(days=365): "year",
|
timedelta(days=365): "datetime.year",
|
||||||
timedelta(days=1): "day",
|
timedelta(days=1): "datetime.day",
|
||||||
timedelta(hours=1): "Hour",
|
timedelta(hours=1): "datetime.hour",
|
||||||
timedelta(minutes=1): "Minute",
|
timedelta(minutes=1): "datetime.minute",
|
||||||
timedelta(seconds=1): "Second",
|
timedelta(seconds=1): "datetime.second",
|
||||||
timedelta(microseconds=1000): "millisecond",
|
timedelta(microseconds=1000): "datetime.millisecond",
|
||||||
timedelta(microseconds=1): "microsecond",
|
timedelta(microseconds=1): "datetime.microsecond",
|
||||||
}
|
}
|
||||||
count = 0
|
count = 0
|
||||||
out_list = []
|
out_list = []
|
||||||
for scale, scale_name in time_scale_names_dict.items():
|
for scale, scale_translation_key in time_scale_translation_keys_dict.items():
|
||||||
if t >= scale:
|
if t >= scale:
|
||||||
count += 1
|
count += 1
|
||||||
n = t / scale if count == max_components else int(t / scale)
|
n = t / scale if count == max_components else int(t / scale)
|
||||||
@ -436,7 +437,8 @@ def pretty_print_timedelta(t: timedelta, max_components=None, max_decimal_places
|
|||||||
if n_txt[-2:] == ".0":
|
if n_txt[-2:] == ".0":
|
||||||
n_txt = n_txt[:-2]
|
n_txt = n_txt[:-2]
|
||||||
|
|
||||||
out_list.append(f"{n_txt} {scale_name}{'s' if n > 1 else ''}")
|
scale_value = translator.t(scale_translation_key, count=n)
|
||||||
|
out_list.append(f"{n_txt} {scale_value}")
|
||||||
|
|
||||||
if out_list == []:
|
if out_list == []:
|
||||||
return "none"
|
return "none"
|
||||||
|
@ -2,6 +2,7 @@ import asyncio
|
|||||||
|
|
||||||
from pydantic import UUID4
|
from pydantic import UUID4
|
||||||
|
|
||||||
|
from mealie.lang.providers import Translator
|
||||||
from mealie.repos.repository_factory import AllRepositories
|
from mealie.repos.repository_factory import AllRepositories
|
||||||
from mealie.schema.recipe.recipe import CreateRecipeByUrlBulk, Recipe
|
from mealie.schema.recipe.recipe import CreateRecipeByUrlBulk, Recipe
|
||||||
from mealie.schema.reports.reports import (
|
from mealie.schema.reports.reports import (
|
||||||
@ -20,11 +21,14 @@ from mealie.services.scraper.scraper import create_from_url
|
|||||||
class RecipeBulkScraperService(BaseService):
|
class RecipeBulkScraperService(BaseService):
|
||||||
report_entries: list[ReportEntryCreate]
|
report_entries: list[ReportEntryCreate]
|
||||||
|
|
||||||
def __init__(self, service: RecipeService, repos: AllRepositories, group: GroupInDB) -> None:
|
def __init__(
|
||||||
|
self, service: RecipeService, repos: AllRepositories, group: GroupInDB, translator: Translator
|
||||||
|
) -> None:
|
||||||
self.service = service
|
self.service = service
|
||||||
self.repos = repos
|
self.repos = repos
|
||||||
self.group = group
|
self.group = group
|
||||||
self.report_entries = []
|
self.report_entries = []
|
||||||
|
self.translator = translator
|
||||||
|
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
@ -81,7 +85,7 @@ class RecipeBulkScraperService(BaseService):
|
|||||||
async def _do(url: str) -> Recipe | None:
|
async def _do(url: str) -> Recipe | None:
|
||||||
async with sem:
|
async with sem:
|
||||||
try:
|
try:
|
||||||
recipe, _ = await create_from_url(url)
|
recipe, _ = await create_from_url(url, self.translator)
|
||||||
return recipe
|
return recipe
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.service.logger.error(f"failed to scrape url during bulk url import {url}")
|
self.service.logger.error(f"failed to scrape url during bulk url import {url}")
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
from mealie.lang.providers import Translator
|
||||||
from mealie.schema.recipe.recipe import Recipe
|
from mealie.schema.recipe.recipe import Recipe
|
||||||
from mealie.services.scraper.scraped_extras import ScrapedExtras
|
from mealie.services.scraper.scraped_extras import ScrapedExtras
|
||||||
|
|
||||||
@ -14,11 +15,12 @@ class RecipeScraper:
|
|||||||
# List of recipe scrapers. Note that order matters
|
# List of recipe scrapers. Note that order matters
|
||||||
scrapers: list[type[ABCScraperStrategy]]
|
scrapers: list[type[ABCScraperStrategy]]
|
||||||
|
|
||||||
def __init__(self, scrapers: list[type[ABCScraperStrategy]] | None = None) -> None:
|
def __init__(self, translator: Translator, scrapers: list[type[ABCScraperStrategy]] | None = None) -> None:
|
||||||
if scrapers is None:
|
if scrapers is None:
|
||||||
scrapers = DEFAULT_SCRAPER_STRATEGIES
|
scrapers = DEFAULT_SCRAPER_STRATEGIES
|
||||||
|
|
||||||
self.scrapers = scrapers
|
self.scrapers = scrapers
|
||||||
|
self.translator = translator
|
||||||
|
|
||||||
async def scrape(self, url: str) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
|
async def scrape(self, url: str) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
|
||||||
"""
|
"""
|
||||||
@ -26,7 +28,7 @@ class RecipeScraper:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
for scraper_type in self.scrapers:
|
for scraper_type in self.scrapers:
|
||||||
scraper = scraper_type(url)
|
scraper = scraper_type(url, self.translator)
|
||||||
result = await scraper.parse()
|
result = await scraper.parse()
|
||||||
|
|
||||||
if result is not None:
|
if result is not None:
|
||||||
|
@ -5,6 +5,7 @@ from fastapi import HTTPException, status
|
|||||||
from slugify import slugify
|
from slugify import slugify
|
||||||
|
|
||||||
from mealie.core.root_logger import get_logger
|
from mealie.core.root_logger import get_logger
|
||||||
|
from mealie.lang.providers import Translator
|
||||||
from mealie.pkgs import cache
|
from mealie.pkgs import cache
|
||||||
from mealie.schema.recipe import Recipe
|
from mealie.schema.recipe import Recipe
|
||||||
from mealie.services.recipe.recipe_data_service import RecipeDataService
|
from mealie.services.recipe.recipe_data_service import RecipeDataService
|
||||||
@ -19,7 +20,7 @@ class ParserErrors(str, Enum):
|
|||||||
CONNECTION_ERROR = "CONNECTION_ERROR"
|
CONNECTION_ERROR = "CONNECTION_ERROR"
|
||||||
|
|
||||||
|
|
||||||
async def create_from_url(url: str) -> tuple[Recipe, ScrapedExtras | None]:
|
async def create_from_url(url: str, translator: Translator) -> tuple[Recipe, ScrapedExtras | None]:
|
||||||
"""Main entry point for generating a recipe from a URL. Pass in a URL and
|
"""Main entry point for generating a recipe from a URL. Pass in a URL and
|
||||||
a Recipe object will be returned if successful.
|
a Recipe object will be returned if successful.
|
||||||
|
|
||||||
@ -29,7 +30,7 @@ async def create_from_url(url: str) -> tuple[Recipe, ScrapedExtras | None]:
|
|||||||
Returns:
|
Returns:
|
||||||
Recipe: Recipe Object
|
Recipe: Recipe Object
|
||||||
"""
|
"""
|
||||||
scraper = RecipeScraper()
|
scraper = RecipeScraper(translator)
|
||||||
new_recipe, extras = await scraper.scrape(url)
|
new_recipe, extras = await scraper.scrape(url)
|
||||||
|
|
||||||
if not new_recipe:
|
if not new_recipe:
|
||||||
|
@ -11,6 +11,7 @@ from slugify import slugify
|
|||||||
from w3lib.html import get_base_url
|
from w3lib.html import get_base_url
|
||||||
|
|
||||||
from mealie.core.root_logger import get_logger
|
from mealie.core.root_logger import get_logger
|
||||||
|
from mealie.lang.providers import Translator
|
||||||
from mealie.schema.recipe.recipe import Recipe, RecipeStep
|
from mealie.schema.recipe.recipe import Recipe, RecipeStep
|
||||||
from mealie.services.scraper.scraped_extras import ScrapedExtras
|
from mealie.services.scraper.scraped_extras import ScrapedExtras
|
||||||
|
|
||||||
@ -77,9 +78,10 @@ class ABCScraperStrategy(ABC):
|
|||||||
|
|
||||||
url: str
|
url: str
|
||||||
|
|
||||||
def __init__(self, url: str) -> None:
|
def __init__(self, url: str, translator: Translator) -> None:
|
||||||
self.logger = get_logger()
|
self.logger = get_logger()
|
||||||
self.url = url
|
self.url = url
|
||||||
|
self.translator = translator
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def get_html(self, url: str) -> str: ...
|
async def get_html(self, url: str) -> str: ...
|
||||||
@ -102,7 +104,9 @@ class RecipeScraperPackage(ABCScraperStrategy):
|
|||||||
return await safe_scrape_html(url)
|
return await safe_scrape_html(url)
|
||||||
|
|
||||||
def clean_scraper(self, scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> tuple[Recipe, ScrapedExtras]:
|
def clean_scraper(self, scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> tuple[Recipe, ScrapedExtras]:
|
||||||
def try_get_default(func_call: Callable | None, get_attr: str, default: Any, clean_func=None):
|
def try_get_default(
|
||||||
|
func_call: Callable | None, get_attr: str, default: Any, clean_func=None, **clean_func_kwargs
|
||||||
|
):
|
||||||
value = default
|
value = default
|
||||||
|
|
||||||
if func_call:
|
if func_call:
|
||||||
@ -118,7 +122,7 @@ class RecipeScraperPackage(ABCScraperStrategy):
|
|||||||
self.logger.error(f"Error parsing recipe attribute '{get_attr}'")
|
self.logger.error(f"Error parsing recipe attribute '{get_attr}'")
|
||||||
|
|
||||||
if clean_func:
|
if clean_func:
|
||||||
value = clean_func(value)
|
value = clean_func(value, **clean_func_kwargs)
|
||||||
|
|
||||||
return value
|
return value
|
||||||
|
|
||||||
@ -138,9 +142,9 @@ class RecipeScraperPackage(ABCScraperStrategy):
|
|||||||
except TypeError:
|
except TypeError:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
cook_time = try_get_default(None, "performTime", None, cleaner.clean_time) or try_get_default(
|
cook_time = try_get_default(
|
||||||
None, "cookTime", None, cleaner.clean_time
|
None, "performTime", None, cleaner.clean_time, translator=self.translator
|
||||||
)
|
) or try_get_default(None, "cookTime", None, cleaner.clean_time, translator=self.translator)
|
||||||
|
|
||||||
extras = ScrapedExtras()
|
extras = ScrapedExtras()
|
||||||
|
|
||||||
@ -157,8 +161,8 @@ class RecipeScraperPackage(ABCScraperStrategy):
|
|||||||
scraped_data.ingredients, "recipeIngredient", [""], cleaner.clean_ingredients
|
scraped_data.ingredients, "recipeIngredient", [""], cleaner.clean_ingredients
|
||||||
),
|
),
|
||||||
recipe_instructions=get_instructions(),
|
recipe_instructions=get_instructions(),
|
||||||
total_time=try_get_default(None, "totalTime", None, cleaner.clean_time),
|
total_time=try_get_default(None, "totalTime", None, cleaner.clean_time, translator=self.translator),
|
||||||
prep_time=try_get_default(None, "prepTime", None, cleaner.clean_time),
|
prep_time=try_get_default(None, "prepTime", None, cleaner.clean_time, translator=self.translator),
|
||||||
perform_time=cook_time,
|
perform_time=cook_time,
|
||||||
org_url=url,
|
org_url=url,
|
||||||
)
|
)
|
||||||
|
@ -9,6 +9,29 @@ def test_json_provider():
|
|||||||
assert provider.t("test2", "DEFAULT") == "DEFAULT"
|
assert provider.t("test2", "DEFAULT") == "DEFAULT"
|
||||||
|
|
||||||
|
|
||||||
|
def test_json_provider_plural():
|
||||||
|
provider = JsonProvider({"test": "test | tests"})
|
||||||
|
assert provider.t("test", count=0) == "tests"
|
||||||
|
assert provider.t("test", count=0.5) == "tests"
|
||||||
|
assert provider.t("test", count=1) == "test"
|
||||||
|
assert provider.t("test", count=1.5) == "tests"
|
||||||
|
assert provider.t("test", count=2) == "tests"
|
||||||
|
|
||||||
|
provider = JsonProvider({"test": "test 0 | test | tests"})
|
||||||
|
assert provider.t("test", count=0) == "test 0"
|
||||||
|
assert provider.t("test", count=0.5) == "tests"
|
||||||
|
assert provider.t("test", count=1) == "test"
|
||||||
|
assert provider.t("test", count=1.5) == "tests"
|
||||||
|
assert provider.t("test", count=2) == "tests"
|
||||||
|
|
||||||
|
provider = JsonProvider({"test": "zero tests | one test | {count} tests"})
|
||||||
|
assert provider.t("test", count=0) == "zero tests"
|
||||||
|
assert provider.t("test", count=0.5) == "0.5 tests"
|
||||||
|
assert provider.t("test", count=1) == "one test"
|
||||||
|
assert provider.t("test", count=1.5) == "1.5 tests"
|
||||||
|
assert provider.t("test", count=2) == "2 tests"
|
||||||
|
|
||||||
|
|
||||||
def test_json_provider_nested_keys():
|
def test_json_provider_nested_keys():
|
||||||
nested_dict = {
|
nested_dict = {
|
||||||
"root": {
|
"root": {
|
||||||
|
@ -4,6 +4,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from mealie.lang.providers import local_provider
|
||||||
from mealie.services.scraper import cleaner
|
from mealie.services.scraper import cleaner
|
||||||
from mealie.services.scraper.scraper_strategies import RecipeScraperOpenGraph
|
from mealie.services.scraper.scraper_strategies import RecipeScraperOpenGraph
|
||||||
from tests import data as test_data
|
from tests import data as test_data
|
||||||
@ -37,15 +38,17 @@ test_cleaner_data = [
|
|||||||
|
|
||||||
@pytest.mark.parametrize("json_file,num_steps", test_cleaner_data)
|
@pytest.mark.parametrize("json_file,num_steps", test_cleaner_data)
|
||||||
def test_cleaner_clean(json_file: Path, num_steps):
|
def test_cleaner_clean(json_file: Path, num_steps):
|
||||||
recipe_data = cleaner.clean(json.loads(json_file.read_text()))
|
translator = local_provider()
|
||||||
|
recipe_data = cleaner.clean(json.loads(json_file.read_text()), translator)
|
||||||
assert len(recipe_data["recipeInstructions"]) == num_steps
|
assert len(recipe_data["recipeInstructions"]) == num_steps
|
||||||
|
|
||||||
|
|
||||||
def test_html_with_recipe_data():
|
def test_html_with_recipe_data():
|
||||||
path = test_data.html_healthy_pasta_bake_60759
|
path = test_data.html_healthy_pasta_bake_60759
|
||||||
url = "https://www.bbc.co.uk/food/recipes/healthy_pasta_bake_60759"
|
url = "https://www.bbc.co.uk/food/recipes/healthy_pasta_bake_60759"
|
||||||
|
translator = local_provider()
|
||||||
|
|
||||||
open_graph_strategy = RecipeScraperOpenGraph(url)
|
open_graph_strategy = RecipeScraperOpenGraph(url, translator)
|
||||||
|
|
||||||
recipe_data = open_graph_strategy.get_recipe_fields(path.read_text())
|
recipe_data = open_graph_strategy.get_recipe_fields(path.read_text())
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@ from typing import Any
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from mealie.lang.providers import local_provider
|
||||||
from mealie.services.scraper import cleaner
|
from mealie.services.scraper import cleaner
|
||||||
|
|
||||||
|
|
||||||
@ -324,32 +325,32 @@ time_test_cases = (
|
|||||||
CleanerCase(
|
CleanerCase(
|
||||||
test_id="timedelta",
|
test_id="timedelta",
|
||||||
input=timedelta(minutes=30),
|
input=timedelta(minutes=30),
|
||||||
expected="30 Minutes",
|
expected="30 minutes",
|
||||||
),
|
),
|
||||||
CleanerCase(
|
CleanerCase(
|
||||||
test_id="timedelta string (1)",
|
test_id="timedelta string (1)",
|
||||||
input="PT2H30M",
|
input="PT2H30M",
|
||||||
expected="2 Hours 30 Minutes",
|
expected="2 hours 30 minutes",
|
||||||
),
|
),
|
||||||
CleanerCase(
|
CleanerCase(
|
||||||
test_id="timedelta string (2)",
|
test_id="timedelta string (2)",
|
||||||
input="PT30M",
|
input="PT30M",
|
||||||
expected="30 Minutes",
|
expected="30 minutes",
|
||||||
),
|
),
|
||||||
CleanerCase(
|
CleanerCase(
|
||||||
test_id="timedelta string (3)",
|
test_id="timedelta string (3)",
|
||||||
input="PT2H",
|
input="PT2H",
|
||||||
expected="2 Hours",
|
expected="2 hours",
|
||||||
),
|
),
|
||||||
CleanerCase(
|
CleanerCase(
|
||||||
test_id="timedelta string (4)",
|
test_id="timedelta string (4)",
|
||||||
input="P1DT1H1M1S",
|
input="P1DT1H1M1S",
|
||||||
expected="1 day 1 Hour 1 Minute 1 Second",
|
expected="1 day 1 hour 1 minute 1 second",
|
||||||
),
|
),
|
||||||
CleanerCase(
|
CleanerCase(
|
||||||
test_id="timedelta string (4)",
|
test_id="timedelta string (4)",
|
||||||
input="P1DT1H1M1.53S",
|
input="P1DT1H1M1.53S",
|
||||||
expected="1 day 1 Hour 1 Minute 1 Second",
|
expected="1 day 1 hour 1 minute 1 second",
|
||||||
),
|
),
|
||||||
CleanerCase(
|
CleanerCase(
|
||||||
test_id="timedelta string (5) invalid",
|
test_id="timedelta string (5) invalid",
|
||||||
@ -366,7 +367,8 @@ time_test_cases = (
|
|||||||
|
|
||||||
@pytest.mark.parametrize("case", time_test_cases, ids=(x.test_id for x in time_test_cases))
|
@pytest.mark.parametrize("case", time_test_cases, ids=(x.test_id for x in time_test_cases))
|
||||||
def test_cleaner_clean_time(case: CleanerCase):
|
def test_cleaner_clean_time(case: CleanerCase):
|
||||||
result = cleaner.clean_time(case.input)
|
translator = local_provider()
|
||||||
|
result = cleaner.clean_time(case.input, translator)
|
||||||
assert case.expected == result
|
assert case.expected == result
|
||||||
|
|
||||||
|
|
||||||
@ -536,10 +538,11 @@ def test_cleaner_clean_nutrition(case: CleanerCase):
|
|||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"t,max_components,max_decimal_places,expected",
|
"t,max_components,max_decimal_places,expected",
|
||||||
[
|
[
|
||||||
(timedelta(days=2, seconds=17280), None, 2, "2 days 4 Hours 48 Minutes"),
|
(timedelta(days=2, seconds=17280), None, 2, "2 days 4 hours 48 minutes"),
|
||||||
(timedelta(days=2, seconds=17280), 1, 2, "2.2 days"),
|
(timedelta(days=2, seconds=17280), 1, 2, "2.2 days"),
|
||||||
(timedelta(days=365), None, 2, "1 year"),
|
(timedelta(days=365), None, 2, "1 year"),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_pretty_print_timedelta(t, max_components, max_decimal_places, expected):
|
def test_pretty_print_timedelta(t, max_components, max_decimal_places, expected):
|
||||||
assert cleaner.pretty_print_timedelta(t, max_components, max_decimal_places) == expected
|
translator = local_provider()
|
||||||
|
assert cleaner.pretty_print_timedelta(t, translator, max_components, max_decimal_places) == expected
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from mealie.lang.providers import local_provider
|
||||||
from mealie.services.scraper import scraper
|
from mealie.services.scraper import scraper
|
||||||
from tests.utils.recipe_data import RecipeSiteTestCase, get_recipe_test_cases
|
from tests.utils.recipe_data import RecipeSiteTestCase, get_recipe_test_cases
|
||||||
|
|
||||||
@ -18,9 +19,10 @@ and then use this test case by removing the `@pytest.mark.skip` and than testing
|
|||||||
@pytest.mark.parametrize("recipe_test_data", test_cases)
|
@pytest.mark.parametrize("recipe_test_data", test_cases)
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_recipe_parser(recipe_test_data: RecipeSiteTestCase):
|
async def test_recipe_parser(recipe_test_data: RecipeSiteTestCase):
|
||||||
recipe, _ = await scraper.create_from_url(recipe_test_data.url)
|
translator = local_provider()
|
||||||
|
recipe, _ = await scraper.create_from_url(recipe_test_data.url, translator)
|
||||||
|
|
||||||
assert recipe.slug == recipe_test_data.expected_slug
|
assert recipe.slug == recipe_test_data.expected_slug
|
||||||
assert len(recipe.recipe_instructions) == recipe_test_data.num_steps
|
assert len(recipe.recipe_instructions or []) == recipe_test_data.num_steps
|
||||||
assert len(recipe.recipe_ingredient) == recipe_test_data.num_ingredients
|
assert len(recipe.recipe_ingredient) == recipe_test_data.num_ingredients
|
||||||
assert recipe.org_url == recipe_test_data.url
|
assert recipe.org_url == recipe_test_data.url
|
||||||
|
Loading…
x
Reference in New Issue
Block a user